diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log deleted file mode 100644 index 67a4a8a..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.09 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0xe277840) reason :GPU Hang -✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log deleted file mode 100644 index 57057a1..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.85 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f5f7bd95565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f5f7bd9592b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f5f7bd95aaf] -/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f5f7f04eeb2] -/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f5f7f054034] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f5f7bdac8ce] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f5f7f70a950] -/usr/local/bin/llama-bench() [0x408242] -/lib64/libc.so.6(+0x35b5) [0x7f5f7b72b5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f5f7b72b668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma-improved] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log index 5b54fa8..3cebd78 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 87.62 ± 0.29 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.57 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 247.81 ± 0.75 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.45 ± 0.27 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log index 0bad5ba..37f9fc9 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.27 ± 0.47 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.61 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.61 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.66 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index d7905a1..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.99 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x3b2ac4f0) reason :GPU Hang -✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log index bfa5aa4..7ac8d22 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 170.65 ± 0.11 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.54 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 246.64 ± 0.87 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.63 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..285f8a6 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.54 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fbe5a1d45a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fbe5a1d496b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fbe5a1d4aef] +/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7fbe5cf42972] +/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7fbe5cf47b0e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fbe5a1ebe5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fbe5d63eab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7fbe59b6a5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fbe59b6a668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log index eef29dc..b28b43c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 93.53 ± 0.08 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 22.66 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 250.33 ± 0.67 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.70 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log new file mode 100644 index 0000000..133d093 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 84.50 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fa4112eb5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa4112eb96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa4112ebaef] +/usr/local/lib64/libggml-hip.so.0(+0x2d5a8e2) [0x7fa4141028e2] +/usr/local/lib64/libggml-hip.so.0(+0x2d5fa7e) [0x7fa414107a7e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fa411302e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fa4147d3ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7fa410c815b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa410c81668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index fbab93a..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2cfb74f0) on address 0x7fb6c65be000. Reason: Page not present or supervisor privilege. -✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log index 2f9b66e..f6b1cab 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 171.42 ± 0.59 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.69 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 250.13 ± 0.62 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.71 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..0e4b151 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f3b59a565a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3b59a5696b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3b59a56aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d5a8e2) [0x7f3b5c86d8e2] +/usr/local/lib64/libggml-hip.so.0(+0x2d5fa7e) [0x7f3b5c872a7e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3b59a6de5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3b5cf3eab0] +/usr/local/bin/llama-bench() [0x40adbc] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7f3b593ec5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3b593ec668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 89eb656..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f2015391565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f201539192b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2015391aaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f2017d0af12] -/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f2017d12a66] -/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f2017d0ffcf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20153abde3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20183de650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20183e02e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20183e51bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20183e600e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x407edc] -/lib64/libc.so.6(+0x35b5) [0x7f2014d275b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2014d27668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 7206046..b9717bd 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 135.10 ± 0.15 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.72 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.74 ± 2.03 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.74 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 69% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log index 86798dc..f90aec0 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.62 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x34ae58c0) reason :GPU Hang -✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.80 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x107a8d10) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index da2e733..a4a0e10 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 142.39 ± 0.25 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.13 ± 0.85 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.73 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 71% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log index 8e28f5f..e66c0c4 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.91 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x1d99e5c0) reason :GPU Hang -✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.91 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x1f16bd10) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4116dc0..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.30 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x1feee5c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 989d06a..f8bb9fc 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 135.50 ± 0.06 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.19 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 333.45 ± 1.70 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.33 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log similarity index 76% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log index b971d30..a9adb30 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 77.55 ± 0.60 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 98.64 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.16 ± 0.00 | -build: ee8dd5c65 (7035) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 32cbd2d..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 30.21 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.28 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 44a30df..1052ad9 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 172.61 ± 0.32 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.78 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 336.20 ± 2.04 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.77 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log index 3b6288e..8226b0c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.57 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.38 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 98.44 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.88 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 99de90f..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.50 ± 0.48 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.72 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log similarity index 79% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log index 3ae2157..c2a507e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 167.68 ± 0.26 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 22.67 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.36 ± 0.16 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..54e1c24 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 47.07 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f6af45f15a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6af45f196b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6af45f1aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f6af734e682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f6af735385e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f6af4608e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f6af7a23ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7f6af3f875b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f6af3f87668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..63f3a20 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.91 ± 1.10 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.68 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..02f852c --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.62 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f95789005a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f957890096b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f9578900aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f957b65d682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f957b66285e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f9578917e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f957bd32ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7f95782965b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f9578296668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..555b577 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.90 ± 1.42 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log similarity index 60% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log index d4885f1..e9e7670 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log @@ -2,8 +2,7 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Hip error: 'an illegal memory access was encountered'(700) at /longer_pathname_so_that_rpms_can_support_packaging_the_debug_info_for_all_os_profiles/src/rocm-libraries/projects/hipblaslt/library/src/amd_detail/hipblaslt.cpp:147 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.41 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x3c42f5c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) +✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..7a33710 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 329.23 ± 1.32 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.83 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..849d02b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fb26dd2a5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb26dd2a96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fb26dd2aaef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fb270b325f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7fb270b377ce] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fb26dd41e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fb271232ab0] +/usr/local/bin/llama-bench() [0x40adbc] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7fb26d6c05b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb26d6c0668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log deleted file mode 100644 index 6263f58..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 99.42 ± 0.19 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.70 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index e644886..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.83 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.85 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 922b095..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 172.08 ± 0.59 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.73 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9854413..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.69 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.85 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 159357c..92f1c15 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 72.38 ± 0.12 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.73 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.77 ± 1.72 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.70 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..3a80892 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.38 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ffa533f15a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ffa533f196b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ffa533f1aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7ffa5614e682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7ffa5615385e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7ffa53408e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7ffa56823ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7ffa52d875b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7ffa52d87668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 43e10b1..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.40 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x1678a6f0) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index e11d2bb..1d3fff3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 131.85 ± 0.23 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.71 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 323.19 ± 0.84 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.69 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..568e806 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.51 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ff4771b65a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ff4771b696b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ff4771b6aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7ff479f13682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7ff479f1885e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7ff4771cde5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7ff47a5e8ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7ff476b4c5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7ff476b4c668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index e87dc1b..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.15 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x106c26f0) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log index 2fe705c..22ca86f 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 99.25 ± 0.11 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.54 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.87 ± 0.79 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.59 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log new file mode 100644 index 0000000..508ce88 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log @@ -0,0 +1,17 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +:0:rocdevice.cpp :3582: 48997963017 us: Callback: Queue 0x7ff041800000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 +Hip error: 'an illegal memory access was encountered'(700) at /therock/src/rocm-libraries/projects/hipblaslt/library/src/amd_detail/hipblaslt.cpp:147 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +Kernel Name: _ZL15flash_attn_tileILi128ELi128ELi16ELi4ELb0EEvPKcS1_S1_S1_S1_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS5_IjLj3EEiiiiiiiiiiiliiliiiiil +VGPU=0xe715690 SWq=0x7ff143a14000, HWq=0x7ff041800000, id=3 + Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 + grid=[4096, 8, 24], workgroup=[32, 8, 1] + private_seg_size=0, group_seg_size=33792 + kernel_obj=0x7fdfbe030100, kernarg_address=0x0x7ff040801600 + completion_signal=0x0, correlation_id=0 + rptr=15, wptr=47 + ✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index 3dccbf9..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x3c42b6f0) on address 0x7f0a849aa000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index 364824c..4a13b64 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 169.93 ± 0.33 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 21.71 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 330.19 ± 0.73 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 21.82 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..bf772f6 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f126437b5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f126437b96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f126437baef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f12671835f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7f12671887ce] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1264392e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f1267858ab0] +/usr/local/bin/llama-bench() [0x40adbc] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7f1263d115b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1263d11668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6e8957c..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.65 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.82 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 47eed24..504b7db 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 217.91 ± 0.48 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.50 ± 0.06 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 228.89 ± 0.52 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.48 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log index 1311ae3..0483c52 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.82 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.79 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 40.49 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 9.30 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log index 5601ee8..3653d58 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 235.07 ± 0.58 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.84 ± 0.01 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 243.57 ± 0.43 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 24.54 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log index 4eb5776..d1590a2 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.43 ± 0.00 | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.49 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 52.62 ± 0.00 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 14.35 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log index a71c62c..3c85cf5 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 86.50 ± 0.17 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.61 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 194.43 ± 0.27 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.65 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..bd8de34 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 36.61 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f4fa9af05a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4fa9af096b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f4fa9af0aef] +/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7f4fac85e972] +/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7f4fac863b0e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f4fa9b07e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f4facf5aab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7f4fa94865b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4fa9486668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log index 9b05d14..c437cd4 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.67 ± 0.37 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.23 ± 0.26 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.64 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 2e3ff57..2e0cf96 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.60 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.34 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 36.83 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.40 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log index 70e2488..fae0037 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 74.73 ± 0.27 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.66 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.45 ± 0.65 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.31 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log similarity index 62% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log index 64bb317..3ebab6a 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log @@ -1,4 +1,4 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 3d49e46..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.97 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.58 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log index 55890b0..c56811e 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 151.03 ± 0.71 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 195.71 ± 0.70 | | glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 55% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log index 1268260..3ebab6a 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -2,5 +2,3 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x320d58b0) reason :GPU Hang -✖ ! [rocm7.1] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index b139639..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,28 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f1a5d310565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f1a5d31092b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1a5d310aaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f1a5fc89f12] -/usr/local/lib64/libggml-hip.so.0(+0x28ce0d7) [0x7f1a5fc9b0d7] -/usr/local/lib64/libggml-hip.so.0(+0x28cccd1) [0x7f1a5fc99cd1] -/usr/local/lib64/libggml-hip.so.0(+0x28cb92c) [0x7f1a5fc9892c] -/usr/local/lib64/libggml-hip.so.0(+0x28c645a) [0x7f1a5fc9345a] -/usr/local/lib64/libggml-hip.so.0(+0x28c2f0a) [0x7f1a5fc8ff0a] -/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f1a5fc8efcf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f1a5d32ade3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f1a6035d650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f1a6035f2e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f1a603641bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f1a6036500e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x407edc] -/lib64/libc.so.6(+0x35b5) [0x7f1a5cca65b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f1a5cca6668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 16c69fb..f984551 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 125.43 ± 0.26 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.48 ± 0.14 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 275.04 ± 0.75 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.57 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 69% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log index 15a1325..942f525 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 14.06 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x97894f0) reason :GPU Hang -✖ ! [rocm-7alpha-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.70 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x9cb5d10) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index bc468da..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x3a41b5c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index a489899..8422388 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 140.41 ± 0.79 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.52 ± 0.05 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 272.75 ± 1.25 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.56 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 68% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log index 6fab8e3..b9c5f49 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.65 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x14c455c0) reason :GPU Hang -✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 33.85 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x2738fd10) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 7e4d816..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 13.20 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x2b2915c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 4d8bb2e..45ffea3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.63 ± 0.70 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.44 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.38 ± 0.34 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.52 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log similarity index 76% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log index 8263fe5..6b49d27 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 92.38 ± 0.37 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.64 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 92.73 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.12 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 08a0928..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 26.75 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.10 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 2a333e8..c29adb3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 145.79 ± 0.11 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.57 ± 0.06 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 277.33 ± 0.75 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.62 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log similarity index 69% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log index 7c0958d..226091a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.07 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x231278c0) reason :GPU Hang -✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 92.73 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x3c5c0d10) reason :GPU Hang +✖ ! [rocm6_4_4] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 5dd5559..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 69.31 ± 0.07 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.50 ± 0.09 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index f95e8b9..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 136.65 ± 0.08 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.46 ± 0.16 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5bb8ae4..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.05 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x3b6528c0) reason :GPU Hang -✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log similarity index 79% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log index 63a07c8..ab6d7f5 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 147.75 ± 0.96 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.69 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 254.32 ± 0.84 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.51 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..594c729 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.17 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f75321e15a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f75321e196b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f75321e1aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f7534f3e682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f7534f4385e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f75321f8e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f7535613ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7f7531b775b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f7531b77668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log similarity index 62% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log index 9e8dc16..bb9476b 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 274.07 ± 3.25 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 15.13 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 253.04 ± 1.12 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..2312744 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 46.53 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f042c8285a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f042c82896b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f042c828aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f042f585682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f042f58a85e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f042c83fe5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f042fc5aab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7f042c1be5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f042c1be668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log new file mode 100644 index 0000000..ded8528 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 257.70 ± 0.50 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.59 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..d557084 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,23 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fca6c0ef5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fca6c0ef96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fca6c0efaef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fca6eef75f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7fca6eefc7ce] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7dd) [0x7fca6c10a46d] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fca6f5f87e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fca6f5fa2b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fca6f5ff6ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fca6f6004fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x408a57] +/lib64/libc.so.6(+0x35b5) [0x7fca6ba855b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fca6ba85668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..499a6d6 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 259.40 ± 0.46 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.61 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..4e96acb --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f4b572795a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f4b5727996b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f4b57279aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f4b5a0815f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7f4b5a0867ce] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f4b57290e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f4b5a781ab0] +/usr/local/bin/llama-bench() [0x40adbc] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7f4b56c0f5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f4b56c0f668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log deleted file mode 100644 index 93400e6..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 94.32 ± 0.20 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.25 ± 0.53 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 3ba3a60..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x3cdc48c0) on address 0x7f1399b6e000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 2543587..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.72 ± 0.76 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.54 ± 0.04 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index df308dc..080c5ce 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 114.56 ± 0.18 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.58 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 254.22 ± 1.28 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.50 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..4e835eb --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 45.90 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fc72ee915a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fc72ee9196b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fc72ee91aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7fc731bee682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7fc731bf385e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fc72eea8e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fc7322c3ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7fc72e8275b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fc72e827668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 53cfcab..65abdf8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 159.14 ± 0.64 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.44 ± 0.20 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 253.25 ± 1.33 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.53 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..34c164b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 45.93 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fc83c7145a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fc83c71496b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fc83c714aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7fc83f471682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7fc83f47685e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fc83c72be5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fc83fb46ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7fc83c0aa5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fc83c0aa668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6c2b70e..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.46 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x2566c6f0) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log index 09b5d9e..522f67c 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 96.45 ± 0.26 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.51 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 258.89 ± 0.25 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 16.54 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log index e1e7b9a..32379f8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.46 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.65 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 79.91 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.08 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index 7e0757d..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x19b4f6f0) on address 0x7f5ea34ff000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index 8b61cc2..c5c6987 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,26 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 130.86 ± 0.36 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.53 ± 0.06 | - -build: 1c398dc9e (7034) +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f48fca035a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f48fca0396b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f48fca03aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f48ff80b5f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d5fe47) [0x7f48ff81fe47] +/usr/local/lib64/libggml-hip.so.0(_Z19ggml_cuda_mul_mat_qR25ggml_backend_cuda_contextPK11ggml_tensorS3_S3_PS1_+0x7d3) [0x7f48ff98aba3] +/usr/local/lib64/libggml-hip.so.0(+0x2d5802c) [0x7f48ff81802c] +/usr/local/lib64/libggml-hip.so.0(+0x2d53e28) [0x7f48ff813e28] +/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f48ff81083f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f48fca1e483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f48ffee17e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f48ffee32b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f48ffee86ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f48ffee94fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7f48fc3995b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f48fc399668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..786a2a8 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fb299bf75a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb299bf796b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fb299bf7aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fb29c9ff5f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7fb29ca047ce] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fb299c0ee5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fb29d0d4ab0] +/usr/local/bin/llama-bench() [0x40adbc] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7fb29958d5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb29958d668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index ca57bca..0000000 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 36.62 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x2d1506f0) reason :GPU Hang -✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 658f23d..da28589 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 261.54 ± 1.01 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.42 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 279.25 ± 0.28 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.61 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log index cd48585..e54f589 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 23.19 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 5.25 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 42.15 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 7.96 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log index df5cc5b..8a162f3 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 233.87 ± 0.08 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.74 ± 0.01 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 244.36 ± 0.45 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.73 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log index 9384aac..512278a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 33.31 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.00 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 54.92 ± 0.00 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 11.62 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log index b4d3bb4..d927b34 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.65 ± 0.07 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.74 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log index 7ce85ee..811a47d 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.85 ± 0.10 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 23.88 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.52 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log index 561ddfe..c040916 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 103.07 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.41 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 8742dc5..2466e1d 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.88 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 24.05 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.52 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 5c6dd7f..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.71 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f295ddb7565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f295ddb792b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f295ddb7aaf] -/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f2960686fb2] -/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f296068c004] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f295ddce8ce] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f2960d2e950] -/usr/local/bin/llama-bench() [0x408242] -/lib64/libc.so.6(+0x35b5) [0x7f295d74d5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f295d74d668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log index 1412b4c..4abdd5e 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 105.64 ± 0.24 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.85 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log index d5ffe77..8007afd 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.96 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.47 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.61 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 73c9fac..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 33.13 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.31 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log index 508f69e..19f1cc0 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 102.84 ± 0.31 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 65.38 ± 0.05 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log index 896060c..33e4c02 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 12.76 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 0.80 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 37.86 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.51 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8d3bd91..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.86 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.28 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index f70644b..de26d53 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.56 ± 0.06 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.84 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..8b47c40 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 26.43 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.85 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index e920862..bb0fc76 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.23 ± 0.06 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 144.36 ± 0.18 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 75% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log index e8d7c87..1022078 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.74 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x1ed645c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 26.46 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x31c9cd10) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 46d4498..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 11.69 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x147425c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index e90399b..03f0f5b 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 105.28 ± 0.10 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.77 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.01 ± 0.05 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..3d5ade1 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 56.24 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.61 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 4e15bb4..0ff9545 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 104.32 ± 0.29 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.78 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.28 ± 0.12 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..9e418ec --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 56.12 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.60 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index f1154a4..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.35 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.40 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index bfc8cc3..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.80 ± 0.11 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 56dac28..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.25 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 73d833d..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 102.92 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 94875a6..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.47 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log similarity index 87% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log index 661ebf4..6224321 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.51 ± 0.07 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 146.01 ± 0.05 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..57c1117 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 35.23 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f2519e175a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f2519e1796b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f2519e17aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f251cb74682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7f251cb7985e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f2519e2ee5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f251d249ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7f25197ad5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f25197ad668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..f6db3a2 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 143.94 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..abd30e6 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 34.82 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.86 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..cc7a19c --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 147.07 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..2ac9624 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f3d030795a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3d0307996b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3d03079aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f3d05e815f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d507ce) [0x7f3d05e867ce] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3d03090e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3d06581ab0] +/usr/local/bin/llama-bench() [0x40adbc] +/usr/local/bin/llama-bench() [0x408b3d] +/lib64/libc.so.6(+0x35b5) [0x7f3d02a0f5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3d02a0f668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..9b4a794 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.12 ± 0.04 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..22906b2 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 52.68 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log deleted file mode 100644 index 0b55222..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.28 ± 0.10 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 38507b6..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 34.15 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.41 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log deleted file mode 100644 index def3e03..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 104.30 ± 0.24 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 165a9b2..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 35.54 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.46 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index a9b07ac..a0078fb 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 103.21 ± 0.08 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.84 ± 0.08 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log index f12bba5..f6357ab 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.08 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 1.07 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 35.12 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.85 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 6afed62..94c795b 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 96.88 ± 0.10 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 143.47 ± 0.06 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index 10b0e83..a58a6dd 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 32.03 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.43 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 35.06 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 1.82 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log index f4be44f..4598534 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 104.95 ± 0.14 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.96 ± 0.08 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log new file mode 100644 index 0000000..25b568d --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log @@ -0,0 +1,28 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f357c5865a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f357c58696b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f357c586aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f357f38e5f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d5fe47) [0x7f357f3a2e47] +/usr/local/lib64/libggml-hip.so.0(+0x2d5e41c) [0x7f357f3a141c] +/usr/local/lib64/libggml-hip.so.0(+0x2d5d5e7) [0x7f357f3a05e7] +/usr/local/lib64/libggml-hip.so.0(+0x2d580cb) [0x7f357f39b0cb] +/usr/local/lib64/libggml-hip.so.0(+0x2d53e28) [0x7f357f396e28] +/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f357f39383f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f357c5a1483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f357fa647e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f357fa662b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f357fa6b6ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f357fa6c4fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7f357bf1c5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f357bf1c668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index f91c6b2..4d9e8ad 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 97.99 ± 0.15 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 2.79 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 145.24 ± 0.16 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.79 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 67% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log index 57f14ae..4b7c8ce 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,6 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 15.05 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x13295700) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 52.70 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 2.57 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3496552..0000000 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2c2c1700) on address 0x7fb65e7a9000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 92ee7d9..3c6c07c 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 98.55 ± 0.11 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 100.73 ± 0.26 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.80 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log index 9a19b40..22f5b61 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 18.09 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.12 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 30.75 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 2.46 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index 6df56ae..022269b 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 86.56 ± 0.57 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 88.29 ± 0.76 | | llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log index 937c694..fafeab9 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.40 ± 0.00 | -| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 2.36 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 31.75 ± 0.00 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 2.56 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log deleted file mode 100644 index 5e208e9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 263.94 ± 2.74 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.18 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log deleted file mode 100644 index 6caba6b..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 155.11 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x3ede5840) reason :GPU Hang -✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log deleted file mode 100644 index 4f0c03b..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f39038cd565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f39038cd92b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f39038cdaaf] -/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f3906b86eb2] -/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f3906b8eb36] -/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f3906b8c09f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f39038e7de3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f3907243650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f39072452e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f390724a1bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f390724b00e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x407edc] -/lib64/libc.so.6(+0x35b5) [0x7f39032635b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3903263668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8e5e336..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 152.66 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.29 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 70dc86e..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 273.53 ± 2.86 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.03 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 968b5d4..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.57 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.85 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 95168a7..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.44 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f3efb9fa565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f3efb9fa92b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f3efb9faaaf] -/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f3efe2c9fb2] -/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f3efe2cf004] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f3efba118ce] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f3efe971950] -/usr/local/bin/llama-bench() [0x408242] -/lib64/libc.so.6(+0x35b5) [0x7f3efb3905b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f3efb390668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log deleted file mode 100644 index 267824f..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 273.71 ± 1.81 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.19 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 6a249ea..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.24 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x143664f0) reason :GPU Hang -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index a61d3a6..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f20b4ffb565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f20b4ffb92b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f20b4ffbaaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f20b7974f12] -/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f20b797ca66] -/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f20b7979fcf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f20b5015de3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f20b8048650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f20b804a2e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f20b804f1bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f20b805000e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x407edc] -/lib64/libc.so.6(+0x35b5) [0x7f20b49915b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f20b4991668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6f8c70b..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fe4591ff565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fe4591ff92b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fe4591ffaaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fe45bb78f12] -/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fe45bb80a66] -/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fe45bb7dfcf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fe459219de3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fe45c24c650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fe45c24e2e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fe45c2531bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fe45c25400e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x407edc] -/lib64/libc.so.6(+0x35b5) [0x7fe458b955b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fe458b95668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index d59b4c3..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 289.25 ± 1.39 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 6dafb67..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 102.08 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.97 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index a640b5e..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.12 ± 0.61 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4d1e5f9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.11 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x1603e5c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index a75b786..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 290.60 ± 0.66 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.04 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 3eb4ab4..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.09 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.17 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index effe5f2..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.38 ± 1.41 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.18 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index d874dfa..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2e6405c0) reason :GPU Hang -✖ ! [rocm6_4_4] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index bf1c76d..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 293.23 ± 0.81 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 06beff3..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 128.37 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.22 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 5dd6ccc..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.19 ± 1.41 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 7347e66..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.25 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x1daad8d0) reason :GPU Hang -✖ ! [rocm7.1-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index e3d8bfe..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.62 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x38db28d0) reason :GPU Hang -✖ ! [rocm7.1] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log deleted file mode 100644 index a2eb5aa..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 294.05 ± 2.13 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 79ba6ac..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.32 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.96 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 3e83b8d..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 126.89 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.26 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 6dd5a2d..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.84 ± 2.59 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.17 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index b16c16b..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.46 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.33 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 5a01db9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.02 ± 1.98 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.13 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index 03f663c..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.12 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.64 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index a54a3f8..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 282.70 ± 0.57 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.15 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index 951edb7..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.42 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.16 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log deleted file mode 100644 index 729dcff..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 272.42 ± 2.18 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log deleted file mode 100644 index c010215..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 149.67 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.51 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log deleted file mode 100644 index 97aa215..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.04 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f0845525565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f084552592b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f0845525aaf] -/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f08487deeb2] -/usr/local/lib64/libggml-hip.so.0(+0x3204034) [0x7f08487e4034] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f084553c8ce] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f0848e9a950] -/usr/local/bin/llama-bench() [0x408242] -/lib64/libc.so.6(+0x35b5) [0x7f0844ebb5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f0844ebb668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index e475de4..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 273.57 ± 2.62 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.18 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 5040a52..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.17 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.86 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index c054b2f..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.41 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.67 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log deleted file mode 100644 index fd71fc8..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 274.27 ± 3.87 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index aa2db73..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 190.45 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x145324f0) reason :GPU Hang -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 2f93317..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f6a6bb84565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f6a6bb8492b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f6a6bb84aaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f6a6e4fdf12] -/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7f6a6e505a66] -/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7f6a6e502fcf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f6a6bb9ede3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f6a6ebd1650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f6a6ebd32e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f6a6ebd81bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f6a6ebd900e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x40816d] -/lib64/libc.so.6(+0x35b5) [0x7f6a6b51a5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f6a6b51a668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0f79dc0..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7fa8c83e4565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa8c83e492b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fa8c83e4aaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7fa8cad5df12] -/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7fa8cad65a66] -/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7fa8cad62fcf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa8c83fede3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa8cb431650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fa8cb4332e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fa8cb4381bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fa8cb43900e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x408087] -/lib64/libc.so.6(+0x35b5) [0x7fa8c7d7a5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa8c7d7a668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 36c42f9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 296.39 ± 0.35 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.15 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 06a66cd..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 100.80 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x37c3a5c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 9635772..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 295.81 ± 2.22 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.15 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3a6f4d1..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 99.06 ± 0.00 | -Memory access fault by GPU node-1 (Agent handle: 0x12fb55c0) on address 0x7f33259aa000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_4-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log deleted file mode 100644 index 5f23bac..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 295.53 ± 3.47 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.12 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 66a18ab..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 208.57 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.36 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 9d9395a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 298.30 ± 1.55 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.15 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 450f683..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x3dac65c0) on address 0x7f27e4f66000. Reason: Page not present or supervisor privilege. -✖ ! [rocm6_4_4] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index b691060..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 295.26 ± 1.05 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 68d0fd5..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 124.66 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.76 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 514eace..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.62 ± 1.21 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9ce7211..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 124.35 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.68 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log deleted file mode 100644 index b8117ce..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 296.33 ± 1.49 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index e76e685..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x17e3c8d0) on address 0x7f09927ea000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7.1] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 960b7b2..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 296.32 ± 1.14 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3fdb828..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.17 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.77 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 1dd56b4..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 291.43 ± 1.53 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.01 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 514a2f1..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.05 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x3beb7700) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 2c41a32..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.94 ± 1.41 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.16 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0dfe4b0..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.25 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.36 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log deleted file mode 100644 index 3ed1431..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 268.04 ± 46.82 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.14 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index 8d2c005..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.94 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.70 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index c54c53f..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 287.00 ± 2.42 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 12.17 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index c15c72a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.54 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.69 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log deleted file mode 100644 index 49022c7..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 349.58 ± 2.09 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.44 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log deleted file mode 100644 index e8bb713..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 313.68 ± 2.67 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 19.49 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log deleted file mode 100644 index fc63c97..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 154.48 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.04 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log deleted file mode 100644 index 1ecb7ad..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 312.46 ± 3.80 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.50 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log deleted file mode 100644 index 84da9ad..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f68ae79e565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f68ae79e92b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f68ae79eaaf] -/usr/local/lib64/libggml-hip.so.0(+0x31feeb2) [0x7f68b1a57eb2] -/usr/local/lib64/libggml-hip.so.0(+0x3206b36) [0x7f68b1a5fb36] -/usr/local/lib64/libggml-hip.so.0(+0x320409f) [0x7f68b1a5d09f] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f68ae7b8de3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f68b2114650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f68b21162e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f68b211b1bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f68b211c00e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x407edc] -/lib64/libc.so.6(+0x35b5) [0x7f68ae1345b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f68ae134668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma-improved] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 331c05f..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 311.89 ± 2.25 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 19.49 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 7f13842..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.25 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.06 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 771396a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 313.81 ± 0.68 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.48 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 522bff5..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.58 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.31 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log deleted file mode 100644 index 0d891ea..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 314.61 ± 2.38 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 19.52 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 9f7e902..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,7 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x2b16a4f0) on address 0x7fc434965000. Reason: Page not present or supervisor privilege. -:0:rocdevice.cpp :3588: 7137878235 us: Callback: Queue 0x7fc544400000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 217184c..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 315.62 ± 2.64 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 19.51 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0aefcee..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,24 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7effceeac565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7effceeac92b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7effceeacaaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7effd1825f12] -/usr/local/lib64/libggml-hip.so.0(+0x28c4a66) [0x7effd182da66] -/usr/local/lib64/libggml-hip.so.0(+0x28c1fcf) [0x7effd182afcf] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7effceec6de3] -/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7effd1ef9650] -/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7effd1efb2e2] -/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7effd1f001bf] -/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7effd1f0100e] -/usr/local/bin/llama-bench() [0x40a3db] -/usr/local/bin/llama-bench() [0x408087] -/lib64/libc.so.6(+0x35b5) [0x7effce8425b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7effce842668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index ce9ca8c..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 308.87 ± 1.54 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.54 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index b5c4247..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 101.87 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.83 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 3cb0a3e..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 306.69 ± 2.02 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.58 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 67162a9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 100.29 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.72 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log deleted file mode 100644 index 5205dc6..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 308.50 ± 4.59 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.57 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 92bc9c9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 209.52 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.95 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 560e0ce..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 308.24 ± 0.27 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.59 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 098adc5..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 217.84 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.97 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index cba7d0c..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 304.34 ± 2.51 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.61 ± 0.02 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index ce38702..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 138.29 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.95 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 29e45ba..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 305.86 ± 2.98 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.55 ± 0.04 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index bdbec90..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 134.32 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.62 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log deleted file mode 100644 index 041ef73..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 306.39 ± 1.58 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.59 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 6d5608d..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 198.60 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.95 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 8622337..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 309.00 ± 2.30 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.56 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index edb70d8..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.95 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.81 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 1d0bb41..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 305.09 ± 1.49 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.58 ± 0.01 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 092d4d5..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 136.13 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.96 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 4e5ac5a..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.67 ± 0.91 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.60 ± 0.02 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 01f0f7e..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.73 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.45 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log deleted file mode 100644 index 132dabf..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 302.22 ± 1.02 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.58 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index ee97d41..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 196.23 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.93 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index d4269f9..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 300.96 ± 2.64 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 18.57 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index 77cdb17..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,6 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x1356700) reason :GPU Hang -✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log deleted file mode 100644 index 716fe5b..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 195.52 ± 1.35 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.65 ± 0.04 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 41ac42b..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 81.66 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 13.06 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log deleted file mode 100644 index 75cece6..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 230.79 ± 1.84 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.79 ± 0.04 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 4f18e20..0000000 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 89.56 ± 0.00 | -| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 15.73 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log new file mode 100644 index 0000000..0a7e1b1 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 788.05 ± 2.61 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 60% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log index 0fc4ce7..ac3d66f 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 19.45 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.86 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 139.54 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.56 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..2a91c3d --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 787.49 ± 3.69 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.48 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 60% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index bc091bc..0ae20f9 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 22.08 ± 0.00 | -| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.05 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 136.23 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.56 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log new file mode 100644 index 0000000..f2bbffd --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 799.32 ± 2.30 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log new file mode 100644 index 0000000..99f7301 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 273.53 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log similarity index 63% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log index ab3540f..5146b25 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 638.38 ± 7.05 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.12 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 804.49 ± 4.05 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: caca0d55c (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..91170dc --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.94 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log similarity index 63% rename from benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log index 2c72757..dcb50e9 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1450.79 ± 15.08 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 28.27 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 792.57 ± 2.08 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..6d3e55b --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 121.02 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..8a899fd --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 794.96 ± 3.40 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.47 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..65b21b6 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 118.46 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log new file mode 100644 index 0000000..176af12 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 802.78 ± 0.92 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..758716e --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 289.71 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log similarity index 63% rename from benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log index 07af172..8016de9 100644 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1438.05 ± 10.26 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 28.37 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 810.15 ± 2.26 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..e1046e5 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 316.13 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.73 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..2eb1c65 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 781.49 ± 1.79 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..4914b28 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 176.40 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..916ac50 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 795.45 ± 1.95 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..47f5829 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 179.22 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log similarity index 63% rename from benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log index b639356..23a623e 100644 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 674.51 ± 4.94 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 37.42 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 797.09 ± 3.94 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..4cb4f21 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 261.91 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..036342e --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 800.44 ± 2.67 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..65bfaf3 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.67 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log similarity index 61% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log index c10b3e0..5b64204 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 648.21 ± 4.33 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.01 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 794.13 ± 2.11 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: b447a9a4b (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 60% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log index 51a2109..a3fd4c8 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 292.59 ± 1.05 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.01 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 177.29 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log similarity index 61% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log index 64802b1..c886bbe 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 633.61 ± 5.41 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.67 ± 0.01 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 757.71 ± 0.34 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.49 ± 0.00 | -build: fa5c85a8b (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..f50d2fb --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 175.04 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 6.83 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log new file mode 100644 index 0000000..7395dcc --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 803.93 ± 1.68 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log new file mode 100644 index 0000000..1ba6a51 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 266.48 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log new file mode 100644 index 0000000..e0dc575 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 767.62 ± 3.68 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..ba3c89c --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 280.71 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 7.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log similarity index 62% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log index 4f33060..8803b27 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 224.36 ± 2.08 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.75 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 228.96 ± 0.25 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 8.20 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log similarity index 63% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log index ea674f2..7d406bf 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 99.90 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.20 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 101.86 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 6.76 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log similarity index 62% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log index e10b91a..7712766 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 280.28 ± 1.95 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.57 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp512 | 198.30 ± 1.06 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg128 | 7.57 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log similarity index 63% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log index 0e48ea6..b4d60c2 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 85.49 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 12.59 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 98.61 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 6.84 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log similarity index 63% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log index bb83bac..b935b7e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 601.34 ± 1.60 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.45 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 575.90 ± 0.60 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.67 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..04b7b08 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 815.96 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 37.94 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log similarity index 62% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log index 1e00f5d..59f12dc 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 601.39 ± 7.96 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.54 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 566.25 ± 5.65 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.75 ± 0.01 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..f2ecfc7 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 818.18 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 37.96 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log new file mode 100644 index 0000000..583fbf1 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 573.96 ± 1.31 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.73 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log new file mode 100644 index 0000000..93a9c1f --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 840.39 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.57 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log new file mode 100644 index 0000000..38ae77f --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 568.52 ± 6.46 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.75 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..69cc8c3 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 842.90 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.55 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log new file mode 100644 index 0000000..5e2df6e --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.52 ± 3.84 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.23 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..1b85141 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1161.59 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.22 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log similarity index 62% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index 6b73205..f7bf3a0 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.70 ± 2.25 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.59 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1075.82 ± 2.72 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.19 ± 0.01 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..69b8f82 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1159.76 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.28 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..5ce08c0 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1111.90 ± 4.65 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.13 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..857e033 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1083.84 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.84 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log new file mode 100644 index 0000000..e818cc0 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1074.40 ± 7.61 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.17 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..00c9009 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1126.17 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.77 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..248c243 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1110.04 ± 2.67 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.43 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..1b323f7 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1034.85 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.58 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..c6ab5d1 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1073.92 ± 6.88 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.52 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..04dbbae --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1041.85 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.59 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log new file mode 100644 index 0000000..31d6990 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1119.24 ± 8.14 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.47 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..749e116 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 964.26 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.21 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log similarity index 62% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log index 5902268..6178218 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 650.26 ± 1.03 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.80 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1086.57 ± 5.04 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.44 ± 0.03 | -build: f1840a25d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..61cd367 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 972.22 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.23 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log similarity index 62% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log index 6c1a5da..8e0530d 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 631.07 ± 4.70 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.89 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1100.11 ± 6.03 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.51 ± 0.01 | -build: b447a9a4b (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..0d652a5 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1036.83 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.63 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..20d0bbe --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1066.41 ± 6.56 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.45 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..6a5e27e --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1030.17 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 38.56 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log similarity index 62% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log index 0c5207d..6d6d161 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 654.79 ± 1.55 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.22 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1110.36 ± 6.45 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.23 ± 0.01 | -build: fa5c85a8b (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log new file mode 100644 index 0000000..d566191 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1042.68 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log new file mode 100644 index 0000000..9edc424 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | pp512 | 1074.73 ± 4.78 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | 0 | tg128 | 41.51 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..a140be0 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1001.21 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.21 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..dbd7200 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 1224.54 ± 7.71 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 46.56 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log similarity index 63% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log index 70c1772..31888ea 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 84.26 ± 0.00 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.85 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 569.03 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 39.60 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log similarity index 61% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log index 73410e7..08568b4 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 211.78 ± 1.53 | -| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.73 ± 0.01 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp512 | 940.69 ± 5.60 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg128 | 45.38 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log similarity index 63% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log index 00f64ac..f7023ef 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 106.42 ± 0.00 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.49 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 489.74 ± 0.00 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 42.30 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log index 73ef692..1c6bafc 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 143.18 ± 0.54 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.08 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.33 ± 3.19 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.05 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..a42d7d6 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 73.22 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f12702305a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f127023096b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f1270230aef] +/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7f1272f9e972] +/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7f1272fa3b0e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f1270247e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f127369aab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7f126fbc65b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f126fbc6668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index d8bed5d..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 28.32 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0xc4754f0) reason :GPU Hang -✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log index 88c43ca..04baa81 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 142.52 ± 0.12 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.13 ± 0.05 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.28 ± 1.09 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.09 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..871b575 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 72.86 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fd4172615a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd41726196b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fd417261aef] +/usr/local/lib64/libggml-hip.so.0(+0x2cb1972) [0x7fd419fcf972] +/usr/local/lib64/libggml-hip.so.0(+0x2cb6b0e) [0x7fd419fd4b0e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fd417278e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fd41a6cbab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7fd416bf75b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fd416bf7668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index df74048..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,29 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 29.46 ± 0.00 | -:0:rocdevice.cpp :3588: 50932421658 us: Callback: Queue 0x7f8e6a000000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 -Kernel Name: _ZL18flash_attn_ext_vecILi128ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiiiiiiiiiiiiiliiliiiiil -VGPU=0x94e06a0 SWq=0x7f8e6cbea000, HWq=0x7f8e6a000000, id=2 - Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 - grid=[32, 68, 64], workgroup=[32, 4, 1] - private_seg_size=0, group_seg_size=4352 - kernel_obj=0x7f8e6a78f180, kernarg_address=0x0x7f738bd49400 - completion_signal=0x0, correlation_id=0 - rptr=1368490, wptr=1369554 - /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f8e79498565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f8e7949892b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f8e79498aaf] -/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f8e7bd67fb2] -/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f8e7bd6d004] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f8e794af8ce] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f8e7c40f950] -/usr/local/bin/llama-bench() [0x408242] -/lib64/libc.so.6(+0x35b5) [0x7f8e78e2e5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f8e78e2e668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log index 17446eb..57846b1 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.03 ± 1.12 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.04 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 194.82 ± 1.18 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.06 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log index ec0c47e..c5d2182 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 141.46 ± 1.06 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 16.01 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 63.02 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.73 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 8ecc87e..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.64 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.44 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log index 8c3fd70..466d7a6 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.69 ± 0.99 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.07 ± 0.05 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 196.80 ± 1.26 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 16.01 ± 0.07 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log index cd33aff..3220457 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 48.54 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.81 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 65.07 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.83 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1eaf21c..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 38.47 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.20 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 6dda78b..3d604c2 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 143.65 ± 1.06 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.07 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 198.21 ± 1.42 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 75% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log index f4da2f8..b607ba2 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 24.32 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x33d785c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 66.78 ± 0.00 | +HW Exception by GPU node-1 (Agent handle: 0x3bf1cd10) reason :GPU Hang +✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index d576265..934a790 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 142.82 ± 1.43 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.11 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 202.49 ± 1.92 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log similarity index 54% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log index c90d084..a8ac7a1 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -4,6 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 24.00 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x252355c0) reason :GPU Hang -✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 69.68 ± 0.00 | +Memory access fault by GPU node-1 (Agent handle: 0x168dcd10) on address 0x7fc71a789000. Reason: Page not present or supervisor privilege. +:0:rocdevice.cpp :2992: 91563147890 us: Callback: Queue 0x7fe05ae00000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 +✖ ! [rocm6_4_4-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log index 6a140e6..bb2c5e9 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.45 ± 0.58 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.88 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 205.39 ± 1.95 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.83 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log similarity index 54% rename from benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log index dbc6872..8924df7 100644 --- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log @@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x2dba35c0) reason :GPU Hang -✖ ! [rocm6_4_4] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1__longctx32768 failed (exit 134) +HW Exception by GPU node-1 (Agent handle: 0x35d3ed10) reason :GPU Hang +✖ ! [rocm6_4_4] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index c1231ef..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.05 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.63 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log index aec88c2..287edc5 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 145.30 ± 1.01 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.98 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.21 ± 1.57 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.97 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..164727d --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x1c519d10) on address 0x7eff38303000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_4] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1371602..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 48.23 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.36 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 2b82621..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.51 ± 1.04 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index f55ebec..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 27.99 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x169288d0) reason :GPU Hang -✖ ! [rocm7.1-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 5cede6c..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.56 ± 0.30 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.16 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 82c98a6..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 27.82 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.99 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log similarity index 77% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log index 24469f8..90e4554 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 140.40 ± 0.48 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.93 ± 0.23 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.07 ± 0.88 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.01 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log index af2a816..564eb79 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.58 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.43 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 65.17 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.26 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..3d9aad0 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 195.14 ± 1.06 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.08 ± 0.05 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..960c254 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 64.99 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fc8c7bfb5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fc8c7bfb96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fc8c7bfbaef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7fc8ca958682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7fc8ca95d85e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7fc8c7c12e5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7fc8cb02dab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7fc8c75915b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fc8c7591668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log new file mode 100644 index 0000000..aee2b6f --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.48 ± 2.40 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.91 ± 0.14 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..6d4a794 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,27 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ff4276395a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ff42763996b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ff427639aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7ff42a4415f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d5f0bb) [0x7ff42a4550bb] +/usr/local/lib64/libggml-hip.so.0(+0x2d5d5e7) [0x7ff42a4535e7] +/usr/local/lib64/libggml-hip.so.0(+0x2d580cb) [0x7ff42a44e0cb] +/usr/local/lib64/libggml-hip.so.0(+0x2d53e28) [0x7ff42a449e28] +/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7ff42a44683f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7ff427654483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7ff42ab427e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7ff42ab442b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7ff42ab496ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7ff42ab4a4fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7ff426fcf5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7ff426fcf668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..59896b1 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f75f10bf5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f75f10bf96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f75f10bfaef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7f75f3ec75f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7f75f3ed1ff5] +/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7f75f3ecc83f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f75f10da483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f75f45c87e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f75f45ca2b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f75f45cf6ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f75f45d04fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7f75f0a555b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f75f0a55668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log similarity index 54% rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log index da835a1..3ebab6a 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -2,5 +2,3 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -HW Exception by GPU node-1 (Agent handle: 0x123888c0) reason :GPU Hang -✖ ! [rocm7.1] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log deleted file mode 100644 index eecc347..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.23 ± 0.15 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.00 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 5878978..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 39.93 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.45 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 53c4e7e..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 146.00 ± 0.62 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.97 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6c54dcb..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 40.14 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.09 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log index 2e72a6e..04307a9 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 144.65 ± 0.59 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.20 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 197.33 ± 0.72 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.13 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..977deb3 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -0,0 +1,20 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 62.78 ± 0.00 | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7efc563255a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7efc5632596b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7efc56325aef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7efc59082682] +/usr/local/lib64/libggml-hip.so.0(+0x2ca585e) [0x7efc5908785e] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7efc5633ce5e] +/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7efc59757ab0] +/usr/local/bin/llama-bench() [0x408c12] +/lib64/libc.so.6(+0x35b5) [0x7efc55cbb5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7efc55cbb668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 9a2c46e..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 27.94 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.94 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 019785a..2fd430d 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 142.70 ± 0.16 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.19 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.34 ± 1.19 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 15.09 ± 0.06 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index 2c848be..c1ee622 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 39.50 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 9.24 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 62.68 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 5.35 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 92dfe4f..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,9 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 28.48 ± 0.00 | -HW Exception by GPU node-1 (Agent handle: 0x66cc700) reason :GPU Hang -✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log index c9db7cc..bb619c0 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 145.17 ± 0.07 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.03 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 201.37 ± 1.76 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.91 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log new file mode 100644 index 0000000..0e71630 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7fb6064785a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb60647896b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7fb606478aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7fb6092805f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7fb60928aff5] +/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7fb60928583f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fb606493483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fb6099567e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7fb6099582b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7fb60995d6ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7fb60995e4fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7fb605e0e5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb605e0e668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log index 20fdc35..ba426aa 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 143.77 ± 0.88 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 15.01 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 199.51 ± 1.70 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 14.98 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..35b0598 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log @@ -0,0 +1,24 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7ff5be8415a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7ff5be84196b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7ff5be841aef] +/usr/local/lib64/libggml-hip.so.0(+0x2d4b5f2) [0x7ff5c16495f2] +/usr/local/lib64/libggml-hip.so.0(+0x2d55ff5) [0x7ff5c1653ff5] +/usr/local/lib64/libggml-hip.so.0(+0x2d5083f) [0x7ff5c164e83f] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7ff5be85c483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7ff5c1d1f7e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7ff5c1d212b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7ff5c1d266ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7ff5c1d274fe] +/usr/local/bin/llama-bench() [0x40ad9b] +/usr/local/bin/llama-bench() [0x4088ac] +/lib64/libc.so.6(+0x35b5) [0x7ff5be1d75b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7ff5be1d7668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1062bdd..0000000 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,7 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -Memory access fault by GPU node-1 (Agent handle: 0x367b7700) on address 0x7f01ade91000. Reason: Page not present or supervisor privilege. -:0:rocdevice.cpp :3582: 3127764077 us: Callback: Queue 0x7f1ba1300000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016 -✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1__longctx32768 failed (exit 134) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log index 9cb68ed..9cfba84 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 139.19 ± 0.25 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.45 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 145.16 ± 0.17 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.77 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log index cd77384..8419851 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 16.94 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 31.17 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 5.72 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log index e5c8408..93bee5e 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 128.55 ± 1.17 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.47 ± 0.01 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 131.53 ± 1.13 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 18.08 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log index ee7cbf7..e397722 100644 --- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 22.82 ± 0.00 | -| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 6.88 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 34.22 ± 0.00 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 9.66 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log index 30d4f60..5730ae3 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 483.01 ± 4.72 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.05 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 489.10 ± 3.76 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log index 0a2e7d3..4d63c40 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 478.10 ± 4.01 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.92 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.60 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.97 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log index 7a1044d..0f8a52e 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 345.22 ± 23.61 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.84 ± 0.40 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 491.47 ± 1.44 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.14 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 42a9ab9..95b1cb9 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 197.19 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.06 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 271.39 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 14.61 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 55cb214..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 135.26 ± 0.00 | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f83b9245565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f83b924592b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f83b9245aaf] -/usr/local/lib64/libggml-hip.so.0(+0x2812fb2) [0x7f83bbb14fb2] -/usr/local/lib64/libggml-hip.so.0(+0x2818004) [0x7f83bbb1a004] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f83b925c8ce] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f83bc1bc950] -/usr/local/bin/llama-bench() [0x408242] -/lib64/libc.so.6(+0x35b5) [0x7f83b8bdb5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f83b8bdb668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log index 94a2f44..71be6fa 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 482.27 ± 5.93 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.04 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 491.95 ± 0.75 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log index a4cc1c5..8240393 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 192.75 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 336.95 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.45 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 461c2aa..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.31 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.02 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log index 28d5bcc..86257e9 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 360.93 ± 3.44 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.17 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 492.47 ± 3.40 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.15 ± 0.01 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log index 51c3480..2f690ca 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.27 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.44 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 318.73 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.44 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index cd6acc8..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 197.49 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.17 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index db05a01..911023a 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 479.75 ± 5.18 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.90 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 567.78 ± 2.40 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.92 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..3aa46ab --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 268.54 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.12 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 07d269e..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.91 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.52 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index ff5b133..2b2c080 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 360.25 ± 7.40 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.84 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.99 ± 6.42 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..bbeaf1a --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 255.99 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.07 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index ee12796..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 107.25 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.50 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index 3ba2948..b35dbc0 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 493.29 ± 1.77 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.72 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.13 ± 3.25 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 26.13 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..d6e3121 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,6 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +Memory access fault by GPU node-1 (Agent handle: 0x3b630d10) on address (nil). Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_4] Qwen3-30B-A3B-BF16-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index c9857c4..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 208.98 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.97 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 89121d5..f3428c4 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 362.53 ± 2.36 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.80 ± 0.00 | - -build: 1c398dc9e (7034) +HW Exception by GPU node-1 (Agent handle: 0x3cc03d10) reason :GPU Hang +✖ ! [rocm6_4_4] Qwen3-30B-A3B-BF16-00001-of-00002__hblt0__fa1 failed (exit 0) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..35fc41e --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 377.73 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.29 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index f1ed352..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 244.87 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.02 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index be5c585..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 484.23 ± 1.92 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.12 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index cfb2c8a..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.05 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.59 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 31947ad..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 357.45 ± 1.20 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.18 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 33f61ce..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 127.49 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.57 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..b8f1d36 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 460.49 ± 1.91 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.12 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..e5274e9 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 234.40 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 17.98 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log similarity index 79% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log index 145d641..ebf1c94 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 352.23 ± 9.28 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 467.86 ± 1.23 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..04c685f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 224.91 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..7edcf87 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 468.47 ± 2.10 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.08 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..eeddc58 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 360.38 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.31 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..d7db6d1 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 470.04 ± 3.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.02 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..d59ba93 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 337.92 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.32 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log deleted file mode 100644 index 5ae522c..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 485.60 ± 4.06 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.98 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index a157427..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.51 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.95 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 5fdd5ef..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 360.51 ± 0.53 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.98 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6612bb7..0000000 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.18 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.94 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 65616ae..8e01011 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 472.32 ± 1.65 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.13 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 461.77 ± 2.14 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log index d1840b6..3203344 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 125.66 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.60 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 234.95 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.08 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 1e4b094..363c223 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 348.39 ± 4.07 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.16 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 463.39 ± 4.60 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.15 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index 370b160..769aa5f 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 194.86 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.04 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 223.07 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 18.04 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log index b987962..d1196f9 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 487.44 ± 3.08 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 26.97 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 466.61 ± 1.68 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log index 8c008da..28bb187 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.05 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 18.97 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 319.20 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.52 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 55406b9..3140710 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 350.49 ± 4.28 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.05 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 473.68 ± 1.69 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 27.11 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log index 121506c..bf51587 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 129.44 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 12.59 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 337.79 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.34 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index f94a365..350bb76 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 191.31 ± 0.05 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.96 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 194.36 ± 0.12 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.96 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log index 316a419..d329a30 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 53.97 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.47 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 86.76 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 8.71 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index 78a6ebf..3fa928a 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 165.85 ± 0.14 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.35 ± 0.01 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 167.29 ± 0.18 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 9.36 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log index 9270cda..c038d21 100644 --- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 73.78 ± 0.00 | -| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 7.95 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 102.90 ± 0.00 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 8.57 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log index c54f7f7..e6192c9 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 568.92 ± 3.37 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 58.40 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 883.95 ± 6.89 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.73 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log index 26cf059..2f33b05 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 573.35 ± 5.61 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.97 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 323.55 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 20.52 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log index 18d9eef..6cc0a0b 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 575.31 ± 5.34 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.66 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 888.32 ± 6.59 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.55 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 4e1681f..99edf40 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.75 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.78 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 323.18 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 20.51 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 63d2bdc..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.86 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.72 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log index 52ebd79..ec5dc7e 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 574.31 ± 5.95 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 58.21 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 887.98 ± 2.33 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.53 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log index 03c9906..486fb05 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 202.50 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.86 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 273.12 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.25 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index ce9fa1f..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.06 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.70 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log index 5ba10ae..afb9e9d 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 576.33 ± 7.18 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.48 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 897.81 ± 13.27 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.56 ± 0.01 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log index ea8d8de..a6ee119 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 147.26 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.73 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 273.07 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 40.23 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 7ef2b2f..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.69 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.79 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log index 4c2afa0..1081a96 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 622.81 ± 3.95 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.81 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1045.84 ± 8.87 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.04 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..e96d662 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 310.27 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.10 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 27ab885..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.56 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.89 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index 6293fb1..40a0181 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 625.44 ± 4.55 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.89 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1046.62 ± 8.31 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..972826c --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 313.66 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.11 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4ad1908..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.66 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.92 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log index c100d84..96b85d0 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 640.29 ± 6.85 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 56.58 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1065.87 ± 15.74 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.71 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..c0d37cd --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 353.38 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.56 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 931a15a..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 203.61 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.59 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log index ad68852..bc9b8e5 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 632.09 ± 4.14 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.49 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1056.16 ± 8.88 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.68 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..5639307 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 341.15 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.59 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 937aa31..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.05 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.58 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 5e9f0dd..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 620.61 ± 2.27 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.85 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 6a603bd..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.70 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.90 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 92621e0..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 617.37 ± 6.53 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.82 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4180a74..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.17 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.89 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log similarity index 78% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log index 787f080..248c0ca 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 579.57 ± 12.23 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 58.33 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 961.79 ± 10.60 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.69 ± 0.02 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..64780d6 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.57 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.72 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..a6ced1b --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 964.88 ± 9.02 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.78 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..5318173 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.64 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.74 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log new file mode 100644 index 0000000..8acd7e7 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 992.39 ± 4.30 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.48 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..c655de8 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 286.94 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.51 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..42d8f1f --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 984.99 ± 7.73 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.39 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..a0ba594 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 284.86 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.29 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log deleted file mode 100644 index db13558..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 628.16 ± 1.90 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.33 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 4ebd080..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.01 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.22 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 8a80c84..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 628.24 ± 3.85 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.33 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0eae3fb..0000000 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 169.20 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.20 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log index 1ec1c21..37e54c9 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 625.38 ± 1.39 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 58.11 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 966.17 ± 7.59 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.85 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log index b26686d..9940008 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.89 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.87 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.45 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.74 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index 68c6582..b07a137 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 616.46 ± 1.40 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.69 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 960.50 ± 8.25 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.91 ± 0.02 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index ac04a7e..f29e900 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 168.61 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.27 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 263.73 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 27.72 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log index 542f5fc..0898dc9 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 625.22 ± 5.42 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.35 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 986.79 ± 6.92 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.42 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log index 6f76e6d..68652c6 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 170.36 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.25 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 410.85 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.33 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log index 0424b40..9c58c7b 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 626.37 ± 6.13 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 57.38 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 984.07 ± 5.87 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 57.52 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log index 475cf3b..29f6134 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.65 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.88 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 282.74 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 39.35 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log index 6ae52c4..66e05a7 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1013.46 ± 4.96 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 62.10 ± 0.01 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1129.76 ± 4.79 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 62.27 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log index 9c287b6..c56e9f5 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 69.70 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 20.21 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 135.73 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 33.09 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log index b783151..2b85a69 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 853.23 ± 3.21 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.93 ± 0.05 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 905.18 ± 4.26 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 66.46 ± 0.05 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log index 109e2cc..98a3d4b 100644 --- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 104.06 ± 0.00 | -| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 29.97 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 172.42 ± 0.00 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 42.04 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log index 3f45fdd..769297d 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 674.15 ± 10.24 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 71.14 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1189.07 ± 7.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.58 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log index e5dc74f..99786f1 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 673.50 ± 8.17 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 70.76 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 332.99 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.83 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log index c79d672..6ef2d48 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 666.63 ± 5.54 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.62 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1185.59 ± 10.60 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.65 ± 0.02 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 09931b2..39fd865 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.86 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.65 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 338.96 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.84 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index ac4567c..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 148.47 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.94 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log index 83e92bc..edf9feb 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 675.10 ± 3.41 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 71.06 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1185.88 ± 9.66 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.64 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log index 126ba4b..eb57e0e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.78 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.71 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 295.00 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.59 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index c72b204..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 161.39 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.57 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log index 2ffcac8..4baec9f 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 676.38 ± 1.86 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.44 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1189.90 ± 9.82 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.61 ± 0.01 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log index 95a5c67..8e7e86b 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 150.94 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.92 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 291.08 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.74 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index e16563c..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.70 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.64 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log index 2559d41..c00de91 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 663.26 ± 2.04 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.79 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1201.14 ± 12.83 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.92 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..4a793ea --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 319.92 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.24 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 2eb5017..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.11 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.39 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log index 2dcdbc5..fd3bded 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 655.75 ± 5.39 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.70 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1208.02 ± 13.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.87 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..37260d9 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 321.94 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.28 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 33683ff..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 109.44 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.45 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log index 3723493..ebb7e8e 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 674.37 ± 11.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 67.62 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1225.68 ± 19.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.13 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..5b810ee --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 387.32 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.53 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 836433f..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.29 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.04 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log index 7fd03c1..cffdfb9 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 665.28 ± 7.13 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.57 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1231.06 ± 2.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.08 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..cd8e169 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 361.60 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.08 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4ae34fe..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.17 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.97 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 584ee09..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 649.91 ± 5.41 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.03 ± 0.02 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 836a7e9..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.49 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.48 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index c3d0904..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 658.06 ± 8.10 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.11 ± 0.03 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9c6b809..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.60 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.50 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..50347bc --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1172.29 ± 9.77 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.19 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..1300fbd --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.50 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 29.99 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log similarity index 62% rename from benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log index f368edd..a50a1bd 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 256.63 ± 1.91 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.18 ± 0.03 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1169.43 ± 4.95 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.31 ± 0.03 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..dafa37b --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.83 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.04 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log similarity index 79% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log index 1242940..0a23174 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 669.29 ± 4.01 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 71.10 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1183.05 ± 9.42 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.17 ± 0.01 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..510d0cb --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 292.36 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.95 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log similarity index 63% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log index 5a4dab6..0ecb4c7 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 602.73 ± 3.88 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.21 ± 0.01 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1195.38 ± 5.88 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.06 ± 0.01 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..a0bbd29 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 287.87 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.84 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log deleted file mode 100644 index 07b2e4b..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 662.81 ± 8.45 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.77 ± 0.02 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 8e3c04e..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.05 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.58 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 972095e..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 660.13 ± 8.26 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.73 ± 0.03 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8abab36..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 169.50 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.56 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log index 75c65e9..71866cb 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 650.55 ± 3.12 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.01 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1166.57 ± 8.22 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.37 ± 0.05 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log index b0cb357..ee0dbbb 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.47 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.52 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.55 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.01 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log index 7e96a4d..28f33f2 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 647.31 ± 2.83 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 69.01 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1167.10 ± 5.32 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 69.33 ± 0.03 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index 157a738..92ee452 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.44 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.63 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.39 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 30.09 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log index 5795548..b6cdbaa 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 660.75 ± 2.92 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.92 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1194.63 ± 7.87 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.98 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log index 4ccb4fd..62b9aa2 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 170.10 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.53 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 292.47 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.87 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log index 77e0839..b165442 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 662.51 ± 3.72 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.75 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1202.35 ± 10.49 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.96 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log index d0ef149..531a084 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.66 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.46 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 292.54 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.02 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log index f2ecb66..5d8e040 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 774.61 ± 2.12 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 81.31 ± 0.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 825.86 ± 2.68 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 80.94 ± 0.04 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log index baf1c37..5b95baa 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 68.30 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 21.82 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 130.78 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 37.08 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log index a5ec37f..94bec33 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 832.44 ± 3.18 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 87.24 ± 0.18 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 864.66 ± 2.72 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 83.07 ± 0.04 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log index 528b0a6..8b6ad3a 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 100.73 ± 0.00 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 33.30 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 168.69 ± 0.00 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 47.63 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log deleted file mode 100644 index 7158506..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 201.48 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.72 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log deleted file mode 100644 index 73b92db..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 587.21 ± 4.27 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.40 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log deleted file mode 100644 index 14f3269..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 200.93 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.69 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 43f023f..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 145.77 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.38 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index bc1d0e0..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 148.73 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.39 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 94073cb..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 160.98 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.64 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index f5ae77f..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 585.58 ± 4.35 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.38 ± 0.01 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 41df426..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.30 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.69 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index c011dfc..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 652.89 ± 1.70 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 45.10 ± 0.01 | - -build: caca0d55c (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 67fe066..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 110.83 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.52 ± 0.00 | - -build: caca0d55c (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 73034cd..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 108.95 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 | - -build: caca0d55c (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index db0c91d..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 648.39 ± 23.62 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.52 ± 0.01 | - -build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index b2cba3d..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 218.15 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.43 ± 0.00 | - -build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index bb24ded..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 640.53 ± 6.75 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.87 ± 0.01 | - -build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9ea6ed4..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.10 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.58 ± 0.00 | - -build: 86f1f4411 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index be9c9f7..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.22 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 | - -build: f1840a25d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index a681ee4..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 634.84 ± 9.56 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.78 ± 0.01 | - -build: f1840a25d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index e9be1b9..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.93 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.56 ± 0.00 | - -build: f1840a25d (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log deleted file mode 100644 index d9f4189..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 649.99 ± 3.07 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.58 ± 0.01 | - -build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 1d5d83e..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.65 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.45 ± 0.00 | - -build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 6ccec8d..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 640.61 ± 7.82 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 44.69 ± 0.01 | - -build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index c37f41e..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.74 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.45 ± 0.00 | - -build: 677be4d78 (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 410a040..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.20 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.54 ± 0.00 | - -build: b447a9a4b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index edbb267..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 131.72 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.55 ± 0.00 | - -build: b447a9a4b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index 3537084..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 237.14 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.14 ± 0.00 | - -build: fa5c85a8b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index d70d55e..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 221.13 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.49 ± 0.00 | - -build: fa5c85a8b (7085) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log deleted file mode 100644 index c79672b..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 1152.51 ± 1.98 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 45.58 ± 0.02 | - -build: ab5783eb4 (7089) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index f4c46b7..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 71.90 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 19.23 ± 0.00 | - -build: ab5783eb4 (7089) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log deleted file mode 100644 index 9616a45..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 916.61 ± 3.21 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 45.81 ± 0.01 | - -build: 0a3857fe0 (7089) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index bba9e01..0000000 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 108.80 ± 0.00 | -| qwen3moe 30B.A3B Q8_0 | 33.51 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 25.33 ± 0.00 | - -build: 0a3857fe0 (7089) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log index 9f939b7..005fd2f 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 248.51 ± 1.01 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.33 ± 0.52 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 345.64 ± 0.84 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.88 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..432cbb5 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 344.40 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 20.67 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log index 92a2b7f..75330e1 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 302.76 ± 0.59 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.42 ± 0.29 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 356.53 ± 3.90 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 27.26 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..88740e8 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 352.86 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 24.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log index aa006bf..9b4db1a 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 242.26 ± 2.27 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.98 ± 0.07 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 346.29 ± 1.98 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.92 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log new file mode 100644 index 0000000..e6be6ea --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 379.41 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.12 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log index 78cebb9..e2108ce 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 287.99 ± 2.09 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.05 ± 0.16 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 360.22 ± 1.39 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 27.35 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..94e037e --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 386.65 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 25.80 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index e2624cd..0b57963 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 318.50 ± 0.98 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.75 ± 0.39 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 573.57 ± 2.61 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.89 ± 0.00 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log similarity index 56% rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log index 47de304..8c312b8 100644 --- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -4,6 +4,6 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.23 ± 0.00 | -Memory access fault by GPU node-1 (Agent handle: 0x15b288c0) on address 0x7f4ebb38d000. Reason: Page not present or supervisor privilege. -✖ ! [rocm7.1-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1__longctx32768 failed (exit 134) +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 464.91 ± 0.00 | +Memory access fault by GPU node-1 (Agent handle: 0x260d7d10) on address 0x7efad124d000. Reason: Page not present or supervisor privilege. +✖ ! [rocm6_4_4-rocwmma] Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index eef8a8f..5dcd687 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 325.56 ± 2.18 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.95 ± 0.39 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 595.88 ± 2.98 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.34 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..10c5d70 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 480.86 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 23.30 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log index 58ce8b9..31e9e1e 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 320.64 ± 0.80 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.55 ± 0.01 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 576.31 ± 0.99 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.64 ± 0.00 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..077eb4c --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 489.52 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 24.50 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 63f8a06..7269e2e 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 325.06 ± 1.27 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.14 ± 0.02 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 590.68 ± 0.83 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.38 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..6bfdd77 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 503.34 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 25.09 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 3e7a174..0000000 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 264.35 ± 7.13 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.89 ± 0.59 | - -build: 03d9a77b8 (7278) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 9e0b310..0000000 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 321.93 ± 4.64 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.64 ± 1.29 | - -build: 03d9a77b8 (7278) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..b4f68a7 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 571.05 ± 4.21 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.45 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..5bad71a --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 444.72 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.38 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..6a40914 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 594.40 ± 3.02 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.73 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..8a21a7c --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,33 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 451.46 ± 0.00 | +:0:rocdevice.cpp :3587: 7778304718 us: Callback: Queue 0x7f5274500000 aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 +Kernel Name: _ZL18flash_attn_ext_vecILi256ELi1EL9ggml_type1ELS0_1ELb0EEvPKcS2_S2_S2_S2_PKiPfP15HIP_vector_typeIfLj2EEffffjfiS6_IjLj3EEiiiiiiiiiiiliiliiiiil +VGPU=0x3fa84a70 SWq=0x7f5276f10000, HWq=0x7f5274500000, id=2 + Dispatch Header =0xb02 (type=2, barrier=1, acquire=1, release=1), setup=0 + grid=[32, 60, 16], workgroup=[32, 4, 1] + private_seg_size=0, group_seg_size=8448 + kernel_obj=0x7f5275b4f600, kernarg_address=0x0x7f3d88d43980 + completion_signal=0x0, correlation_id=0 + rptr=813327, wptr=816270 + /opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:94: ROCm error +/usr/local/lib64/libggml-base.so.0(+0x35a5) [0x7f52b67fb5a5] +/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f52b67fb96b] +/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f52b67fbaef] +/usr/local/lib64/libggml-hip.so.0(+0x2ca0682) [0x7f52b9558682] +/usr/local/lib64/libggml-hip.so.0(+0x2cab085) [0x7f52b9563085] +/usr/local/lib64/libggml-hip.so.0(+0x2ca58cf) [0x7f52b955d8cf] +/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7f52b6816483] +/usr/local/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f52b9c2e7e0] +/usr/local/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe2) [0x7f52b9c302b2] +/usr/local/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x3bf) [0x7f52b9c356ff] +/usr/local/lib64/libllama.so.0(llama_decode+0xe) [0x7f52b9c364fe] +/usr/local/bin/llama-bench() [0x408c92] +/lib64/libc.so.6(+0x35b5) [0x7f52b61915b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f52b6191668] +/usr/local/bin/llama-bench() [0x409c25] +✖ ! [rocm7.1.1-rocwmma] Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__hblt0__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log new file mode 100644 index 0000000..5299717 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 568.38 ± 2.63 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.50 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..04895c4 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 462.35 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 22.01 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..6183815 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 587.50 ± 6.59 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.81 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..1c1bdde --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 479.80 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.62 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 3e28a6b..0000000 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 324.95 ± 0.42 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.54 ± 0.14 | - -build: 03d9a77b8 (7278) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log index b63a3ff..eb49689 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 255.48 ± 2.19 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.31 ± 0.65 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 567.13 ± 2.43 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.44 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..6954010 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 438.42 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 23.36 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 8e93afb..c22f1e0 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 327.95 ± 1.55 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.64 ± 1.08 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 594.63 ± 11.03 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.76 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..354c810 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 442.41 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 23.66 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log index 211c7a5..f4855bd 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 264.12 ± 2.61 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 24.94 ± 0.15 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 570.28 ± 3.77 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.05 ± 0.01 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log new file mode 100644 index 0000000..4d8d15d --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 473.92 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 24.80 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log index eedcdc9..0d97f79 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 328.73 ± 4.15 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 25.46 ± 0.21 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | pp512 | 588.80 ± 0.73 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 1 | 0 | tg128 | 26.79 ± 0.00 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..534ab2b --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 488.38 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 21.96 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log index 3d755bd..c79c18c 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 417.28 ± 0.97 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 30.31 ± 0.01 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 406.08 ± 1.14 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 33.67 ± 0.02 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log new file mode 100644 index 0000000..e8d9bcb --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 194.48 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 27.27 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log index a88bd50..45b0ac9 100644 --- a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 333.74 ± 1.19 | -| qwen3next ?B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 27.76 ± 0.01 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp512 | 326.83 ± 0.94 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg128 | 30.18 ± 0.02 | -build: 03d9a77b8 (7278) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log new file mode 100644 index 0000000..320c1bb --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 220.99 ± 0.00 | +| qwen3next 80B.A3B Q8_0 | 79.57 GiB | 79.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 27.91 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log index 457da89..361a6d7 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 795.35 ± 0.84 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 421.94 ± 0.25 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log index c222b4c..ecd2fdf 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 751.05 ± 61.73 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.21 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 332.05 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.86 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log index 20781ba..a340223 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 804.75 ± 0.44 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.16 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 424.57 ± 0.40 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index b2ce95e..8d852c2 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 323.48 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.58 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 325.94 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 11.85 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index c48fbb5..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 369.35 ± 42.57 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.04 ± 0.01 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log index 9d36ff9..d060db9 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 816.42 ± 1.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.25 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 429.89 ± 0.35 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log index d51e846..ba5f705 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 335.43 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.57 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 309.80 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.77 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 4fb65df..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 345.30 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log index 1ce39d9..69b7431 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 826.54 ± 0.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.23 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 430.69 ± 0.83 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.25 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log index 386e6b7..32c78b5 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 354.28 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.03 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 338.46 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.76 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index b3b65b6..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 371.28 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.58 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log index 9318d52..a2843f4 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 811.49 ± 0.16 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.14 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 897.73 ± 0.63 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.14 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..312778f --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 288.29 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.01 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index b3f919a..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 151.46 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.18 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log index a3541b4..5bb70aa 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 819.41 ± 1.64 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.15 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 904.28 ± 1.51 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.15 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..7ded8ad --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 290.79 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0e73bbb..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 149.96 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.19 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log index 4a106ef..235334a 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 826.24 ± 1.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.24 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 913.75 ± 0.60 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.21 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..db48774 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 425.43 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.72 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index ad48dd5..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 267.36 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.56 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log index b9b9282..184977a 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 833.10 ± 1.71 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.21 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 919.42 ± 0.44 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.19 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..616dc46 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 430.76 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.78 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4bd97eb..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 252.65 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.56 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 8eba04b..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 797.32 ± 10.12 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 6aa5b8e..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 180.48 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 1c805a6..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 810.77 ± 1.82 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.18 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6883e75..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 166.10 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log similarity index 79% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log index 6fc839e..bc50843 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 799.75 ± 0.53 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.21 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 886.59 ± 0.64 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..fc15a41 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 303.06 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.05 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..328096e --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 890.97 ± 0.75 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.17 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..64f9cf7 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 304.61 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log new file mode 100644 index 0000000..05f259d --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 910.34 ± 0.61 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..4766a18 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 391.58 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.80 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log similarity index 62% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log index 448f974..9cbc2a8 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 269.91 ± 0.99 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.11 ± 0.05 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 917.22 ± 0.99 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.28 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..58aaf7a --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 394.15 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.77 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log deleted file mode 100644 index 97452bc..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 819.82 ± 2.06 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.25 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 2394aec..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 269.95 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log deleted file mode 100644 index d5fe530..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 832.48 ± 2.03 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.26 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 4159fec..0000000 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 266.41 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.58 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log index 7a42f38..c77a98f 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 802.23 ± 0.79 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 882.81 ± 0.63 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.16 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log index 817f7a7..27debaf 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 170.65 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.20 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 302.60 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.02 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log index 9694972..8c5636c 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 790.38 ± 0.48 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.16 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 891.69 ± 0.16 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.16 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index c8f08ef..828c786 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 265.54 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 306.94 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.03 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log index 0cfd222..522f057 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 813.63 ± 8.49 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.21 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 913.03 ± 1.04 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.26 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log index 8acf2bf..7f19c44 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 252.49 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 11.59 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 399.02 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.77 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log index 5e8c571..19c608d 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 813.23 ± 0.99 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 14.26 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 917.06 ± 0.24 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.27 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log index 5d4eb49..5fd8d29 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.45 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 10.21 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 391.45 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.78 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index be25934..1e29149 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 590.41 ± 71.66 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.51 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 690.02 ± 1.72 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.56 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log index 25c302d..411a64f 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 17.29 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 11.88 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 54.64 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 12.98 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log index 19663a7..649e0ba 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 533.84 ± 0.83 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.99 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 549.78 ± 1.79 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 13.95 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log index 1e15198..266830e 100644 --- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 219.21 ± 0.00 | -| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.99 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 270.10 ± 0.00 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 11.42 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log index 9557b2c..6da9bcd 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 428.59 ± 48.51 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.02 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.46 ± 1.81 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log index 971037c..8996dbe 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 439.12 ± 31.65 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.01 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 167.60 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.56 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log index 64c398a..fa35df2 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 527.23 ± 0.03 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 522.97 ± 0.37 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index a95bffd..d4ae304 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 112.92 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 170.01 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.56 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 489bab2..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 95.21 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.09 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log index ea9b476..ac992c7 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 409.77 ± 60.03 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.02 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 477.16 ± 1.86 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log index 22fc859..9520a2f 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 115.37 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 237.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.87 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 4643958..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 192.30 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.57 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log index b1e0c7c..12e2b0a 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 528.54 ± 0.37 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.02 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 527.33 ± 1.41 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log index 2f24b71..fdf835b 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 93.71 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.11 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 270.71 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.87 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index af4c4a3..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 199.09 ± 3.44 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.73 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log index 2e31fbe..e164e47 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 469.22 ± 0.41 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.01 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.30 ± 0.54 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..f9c5383 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 167.49 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.71 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 0f58452..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 92.10 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log index 22e7612..fef7449 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 524.21 ± 1.37 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.01 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 525.67 ± 0.68 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..d70f1e9 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 169.41 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.71 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index e5b9e9f..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 93.57 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.42 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log index dab5c8e..08ada47 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 472.47 ± 0.58 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.00 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 472.62 ± 0.27 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..0f58777 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 246.86 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.86 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 1676f6f..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.47 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log index 0f8145c..59b9670 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 530.73 ± 0.34 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.00 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 530.96 ± 0.63 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.00 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..63ed4a7 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 285.15 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.86 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 09322c1..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 204.11 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.72 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index a728a7e..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 463.62 ± 0.34 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index bc3fd22..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 113.46 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.43 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 96cab07..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 522.69 ± 0.87 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index ef863b6..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 115.46 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.43 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..cd81a2b --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 463.74 ± 0.73 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..79e38a4 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 193.84 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.75 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log similarity index 79% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log index 906acce..3ec22c1 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 513.70 ± 0.55 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.01 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 522.71 ± 0.55 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..b029e97 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 205.34 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log similarity index 62% rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log index 8439859..4d7ac4f 100644 --- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 271.67 ± 1.52 | -| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 12.13 ± 0.05 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.55 ± 0.52 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..1566960 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 234.18 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.89 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..d9990e7 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 526.17 ± 0.74 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..b854410 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 269.04 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.89 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log deleted file mode 100644 index cc3c3bf..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 434.79 ± 46.53 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 71c3bde..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 179.10 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 355f439..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 524.39 ± 1.39 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 2a48b32..0000000 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 195.60 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log index 687bbbd..8554509 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 463.45 ± 0.58 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 462.78 ± 0.04 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log index 849f667..646709f 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 115.59 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.44 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 194.64 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log index 2d69680..a04ab5b 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 499.44 ± 0.09 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.88 ± 0.71 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index bbcd196..5e20aed 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 177.69 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 195.11 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log index 6b32ee7..7e00b6d 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 470.06 ± 0.56 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 474.02 ± 0.22 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log index 0bcabf8..176bc97 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 198.39 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.74 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 308.72 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.85 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log index 3d27335..8372beb 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 501.79 ± 0.45 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 4.04 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 506.00 ± 0.53 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.04 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log index 5824c19..ca30c47 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 111.71 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.44 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 320.96 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 3.89 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index 3b0ee1d..7efd9cc 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -5,4 +5,4 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: ggml_vulkan: Device memory allocation of size 2819260416 failed. ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 failed (exit 1) +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log similarity index 88% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log index 1586d96..fb6fd18 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log @@ -5,4 +5,4 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: ggml_vulkan: Device memory allocation of size 2819260416 failed. ggml_vulkan: Requested buffer size exceeds device buffer size limit: ErrorOutOfDeviceMemory main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf' -✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1__longctx32768 failed (exit 1) +✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002__fa1 __longctx16384 failed (exit 0) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log index b0e18ba..6df7e0c 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 106.82 ± 1.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.92 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 107.99 ± 1.50 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 3.93 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log index 0fa299f..8d0bace 100644 --- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 62.49 ± 0.00 | -| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 3.63 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 67.01 ± 0.00 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 3.76 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log index ca2c81f..af8f3fc 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2245.25 ± 4.85 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 75.82 ± 8.64 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2767.54 ± 1.34 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.17 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log index 431efbd..825bba8 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2224.91 ± 1.45 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 75.58 ± 9.31 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1439.14 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.99 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log index 4380d71..b5d1673 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2111.57 ± 5.75 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.04 ± 0.01 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2762.69 ± 4.25 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.08 ± 0.02 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index b15abfe..c143312 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1239.19 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.92 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1442.24 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.87 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6120e6e..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1190.60 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.04 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log index 6eb525c..6e0df52 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2256.38 ± 8.98 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 84.67 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2759.74 ± 13.26 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.86 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log index efc4431..c304f88 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1185.65 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.89 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1331.68 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 71.38 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 7e3987d..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1206.03 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.51 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log index 593579b..fb3a67c 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2154.09 ± 4.72 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.41 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2748.02 ± 15.88 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.54 ± 0.01 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log index 201e592..54a7f4d 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1224.83 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.09 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1341.74 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 71.34 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index afca8d4..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1291.24 ± 6.88 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 59.61 ± 0.03 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log index 4e866ac..73eee2d 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2283.48 ± 2.94 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 78.74 ± 0.13 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2907.52 ± 4.15 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.61 ± 0.04 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..26bd649 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1365.96 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.95 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 9d4bdf7..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 898.63 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.15 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log index 1940700..069c775 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2154.45 ± 10.83 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 76.62 ± 3.81 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2902.86 ± 2.84 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.68 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..93d8e93 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1384.06 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.93 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9744908..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 855.04 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 57.04 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log index 3f0a925..8d4790f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2276.80 ± 11.52 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 82.07 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2868.25 ± 16.39 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 80.93 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..bc9a6e4 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1413.39 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.93 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index bfc5d28..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1497.53 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.57 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log index 01c1a9e..3d1a62f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2161.24 ± 6.51 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 82.35 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.90 ± 17.97 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.07 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..f3ad530 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1414.92 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.13 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 3805856..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1440.10 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.38 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 57b3971..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2269.02 ± 4.71 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 59.93 ± 6.59 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index e02d30a..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1031.65 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.77 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 3020474..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2141.35 ± 2.64 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 64.63 ± 11.41 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 99cf514..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1002.59 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.91 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log similarity index 79% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log index ce8e80c..bd61453 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2106.39 ± 2.40 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 84.35 ± 0.02 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2812.03 ± 15.70 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.02 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..f15de69 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1347.24 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.93 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..19e21d1 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2820.50 ± 10.20 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.66 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..45cfba0 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1390.56 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.23 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log new file mode 100644 index 0000000..ae8df7d --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2853.13 ± 21.11 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.93 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..e604ec0 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1368.50 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 68.97 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..88706b8 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2846.23 ± 16.40 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.96 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..7f6b35d --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1361.15 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.99 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log deleted file mode 100644 index fdf8139..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2261.65 ± 12.88 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 62.69 ± 7.43 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index d8a31db..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1160.50 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.25 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log deleted file mode 100644 index ec3a243..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2171.00 ± 3.48 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 65.68 ± 10.35 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 2c61af5..0000000 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1240.47 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.34 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log index 617a0a7..988efe2 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2257.61 ± 5.08 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 78.84 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2843.92 ± 0.49 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.68 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log index d5495a2..0c8d637 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1033.75 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.84 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1377.32 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 66.63 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log index ed4dea8..0d50d4b 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2102.34 ± 8.12 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 67.40 ± 10.30 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2841.34 ± 6.05 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 78.55 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index aa94541..585ce4f 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1168.34 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.08 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1340.85 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.21 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log index 45c07a2..edb6f5d 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2275.52 ± 10.03 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.45 ± 10.56 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2888.98 ± 3.97 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.99 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log index ccee19b..d5bf76c 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1227.13 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 58.33 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1300.12 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 65.93 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log index 0bbe924..6d438b1 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 2114.70 ± 2.89 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 68.07 ± 12.18 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2874.43 ± 2.44 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 81.99 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log index 38a595f..be4afd5 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 1046.30 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.97 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 1332.98 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 67.59 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log index a0b24fe..b62a85a 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1229.75 ± 236.47 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 75.94 ± 2.23 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1798.72 ± 4.50 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 90.73 ± 0.09 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log index b0d1ee5..24bb270 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 145.82 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 64.34 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 466.89 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 74.73 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log index eab1c8f..a154a01 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1116.46 ± 204.92 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 78.27 ± 2.29 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1633.15 ± 4.31 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 85.91 ± 0.19 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log index 4e79c73..42fcd8d 100644 --- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 646.29 ± 0.00 | -| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 45.91 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 776.82 ± 0.00 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 58.76 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log deleted file mode 100644 index 03aa189..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 669.82 ± 4.74 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.40 ± 0.01 | - -build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log deleted file mode 100644 index b5827e0..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 325.08 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.40 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log deleted file mode 100644 index 485f1c6..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 673.38 ± 9.06 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 37.47 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log deleted file mode 100644 index bf9bed5..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 332.86 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.37 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 9ec933d..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 656.31 ± 30.59 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.35 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index ac7ca98..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.34 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.27 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log deleted file mode 100644 index 3ef62f3..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 680.04 ± 3.32 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 37.34 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 2c76d4b..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.06 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.92 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1.log deleted file mode 100644 index 6bb24dd..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 667.33 ± 4.16 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.43 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 140ae37..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 262.90 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.21 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 921057f..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 335.42 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.24 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index 6c5e115..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 778.24 ± 5.71 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.19 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index c7414f8..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 301.71 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.25 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index ed939fb..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 783.56 ± 11.04 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.18 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 456bcc6..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 276.51 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.58 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log deleted file mode 100644 index cc0886f..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 779.12 ± 1.84 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 36.55 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 343c160..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 335.09 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.90 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index 68f6644..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 774.77 ± 13.26 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.20 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 7189d80..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 454.32 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 30.39 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 4e8eea8..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 769.93 ± 0.42 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.36 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 35d9468..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.72 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.26 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 5cdbd11..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 771.12 ± 3.66 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.39 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 9dbd7d0..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.41 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.75 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1.log deleted file mode 100644 index e205824..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 757.60 ± 0.73 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.06 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 6ad4e1b..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 270.78 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.89 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log deleted file mode 100644 index c6b089a..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 763.20 ± 10.81 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.04 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index b01e2f2..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 283.58 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.90 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 199e45f..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 775.86 ± 2.82 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.40 ± 0.01 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 1e40ed9..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 225.21 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.26 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index e8d3b53..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 776.83 ± 4.02 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.34 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 50ebc2a..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 223.16 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 13.23 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log deleted file mode 100644 index 6407b06..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 750.63 ± 5.94 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.04 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index dcb3220..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 281.34 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.87 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index ee52230..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 756.08 ± 9.81 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 37.06 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index baf0f15..0000000 --- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.00 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 29.76 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log deleted file mode 100644 index 3c5b3a7..0000000 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 720.94 ± 1.15 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 35.76 ± 0.02 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index ad679ba..0000000 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 166.61 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 25.02 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log deleted file mode 100644 index 1977efa..0000000 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 513.71 ± 2.70 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 34.86 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 010091f..0000000 --- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 157.10 ± 0.00 | -| gpt-oss 120B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.67 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log index 4a5e683..2252fb8 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 665.60 ± 9.61 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.94 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 274.17 ± 2.38 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.85 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log index e396f32..a29ee58 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 673.11 ± 6.92 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 52.11 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 303.64 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 29.04 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log index 4de5343..ee65108 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.77 ± 57.43 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.85 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 276.82 ± 4.42 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.95 ± 0.01 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 971cbac..fdb5447 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 331.82 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.00 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 303.82 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 29.04 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 448a686..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.50 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.84 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log index 90b7ef8..2d33a3b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 652.18 ± 8.35 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 52.02 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 274.29 ± 0.47 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.18 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log index ab47309..48bc097 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 326.59 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 27.34 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 301.57 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.81 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 8dfe9e9..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 254.16 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.85 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log index 4858f77..b27b11c 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 657.18 ± 7.22 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.14 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 274.47 ± 1.46 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.23 ± 0.01 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log index 1d374b5..46fc918 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 224.65 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.73 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 289.74 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.82 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index f1126a0..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 336.48 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.80 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log index 7d32aad..7a833ac 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 767.82 ± 6.23 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.71 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 657.63 ± 7.64 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.65 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..5159dd1 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 479.02 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.15 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index ff08d39..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 301.90 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.75 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log index a21151f..9ed737f 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 782.34 ± 9.39 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.76 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 649.55 ± 10.69 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.68 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..b3e1604 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 480.59 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.88 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 85d2d56..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 293.43 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.44 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log index f529ee6..303a0c0 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 762.33 ± 0.82 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.67 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 654.41 ± 2.17 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.37 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..a44f435 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 470.66 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.00 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 3c1ffc5..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 341.35 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.61 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log index 7b25ca7..aac1650 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 778.37 ± 3.31 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.63 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 658.64 ± 9.76 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.93 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..4d3497f --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 472.41 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 45.26 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index d7ed569..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 358.69 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 39.51 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 5cfe906..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 755.98 ± 7.49 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.78 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 44f2fde..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.50 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.71 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index 824e9db..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 766.72 ± 15.10 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.72 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1b571bc..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 225.80 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.74 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..dfe030e --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 637.48 ± 24.73 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.66 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..9ce2672 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 412.07 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.91 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log similarity index 79% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log index 8bbdcf1..a164176 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 675.94 ± 0.23 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 52.09 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 653.60 ± 10.65 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.79 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..315116f --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 409.77 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.99 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log new file mode 100644 index 0000000..79ce413 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 639.37 ± 5.37 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.13 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..6d3d42a --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 496.89 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.73 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..0a274b3 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 646.49 ± 8.17 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.16 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..81ff7b2 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 501.87 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.01 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log deleted file mode 100644 index 4090438..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 742.07 ± 2.29 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.23 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 65eff65..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 263.78 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.36 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log deleted file mode 100644 index fe72ff6..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 763.92 ± 4.22 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.34 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index a0a4c62..0000000 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 367.14 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.34 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log index b87dc76..30be698 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 753.49 ± 1.04 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.76 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 645.91 ± 13.82 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.67 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log index bf10e38..cc45722 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.01 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.73 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 410.91 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.97 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log index 3f9ccaa..ec1ce05 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 774.40 ± 6.21 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.74 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 653.08 ± 2.58 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.78 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index b80557c..43778f2 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 274.48 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.72 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 413.56 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 28.99 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log index eb7d365..8113168 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 746.02 ± 1.26 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.20 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 637.84 ± 10.76 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.22 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log index 0959919..9c34c8d 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 284.03 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.23 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 522.65 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 43.99 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log index bb466e7..a93d1d4 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 752.08 ± 9.39 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.38 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 642.63 ± 7.27 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.17 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log index 4f67e15..4db2edc 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 226.23 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.45 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 532.69 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 44.03 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log index 66d71b9..a1dd94b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 759.84 ± 2.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.66 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 792.77 ± 0.78 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 52.34 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log index 08a9873..123da16 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 169.89 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 32.63 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 266.30 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 39.24 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log index 6a55bcb..9ffcf99 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 534.51 ± 0.52 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 54.67 ± 0.10 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 543.66 ± 0.88 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 56.37 ± 0.04 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log index e6a3286..3dc745b 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 159.72 ± 0.00 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 37.31 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 299.40 ± 0.00 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 45.88 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log deleted file mode 100644 index 2a24d43..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1430.02 ± 3.53 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.38 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log deleted file mode 100644 index e8f169c..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 551.38 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.35 ± 0.00 | - -build: 31df4608 (7038) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log deleted file mode 100644 index e6da731..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 555.09 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.31 ± 0.00 | - -build: 12bb5c37 (7074) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log deleted file mode 100644 index 81766fe..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1494.10 ± 6.72 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.26 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log deleted file mode 100644 index b3e8085..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 357.87 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.88 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 19c4a09..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 357.94 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.86 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1.log deleted file mode 100644 index 1e7e8ae..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1430.88 ± 12.04 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.37 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 991dcb5..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 419.15 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.30 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log deleted file mode 100644 index 7c7b5b2..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1399.32 ± 6.94 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 28.35 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index a04a115..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 556.09 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 24.24 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log deleted file mode 100644 index dfc5846..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1500.80 ± 17.51 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.24 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index a0525bf..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 479.16 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.87 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log deleted file mode 100644 index 6fbbc9a..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1440.00 ± 14.14 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.28 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index d615a03..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 478.62 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.86 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log deleted file mode 100644 index 2be1bd7..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1449.04 ± 10.94 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 27.86 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 90b82e9..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 619.30 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.38 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log deleted file mode 100644 index afe2728..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1444.02 ± 15.40 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.40 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 36b8745..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 612.85 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.39 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 149dafc..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1499.09 ± 21.84 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.40 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index a7f08c1..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 350.48 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index fb29283..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1448.72 ± 23.17 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.37 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 95a1696..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 352.46 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.92 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1.log deleted file mode 100644 index 061048d..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1437.87 ± 9.12 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.25 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index fb8cd0f..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 443.64 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.05 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log deleted file mode 100644 index df945d7..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1405.44 ± 19.51 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.27 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 658f7c8..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 454.92 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.10 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log deleted file mode 100644 index 0af68bb..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1486.29 ± 11.42 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.39 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log deleted file mode 100644 index e91a628..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 352.25 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.92 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log deleted file mode 100644 index 3e93b8a..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1426.84 ± 4.41 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.36 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1a33f1d..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 351.69 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 17.91 ± 0.00 | - -build: bca95ca51 (7036) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log deleted file mode 100644 index 2500f7d..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1424.62 ± 5.37 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.27 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log deleted file mode 100644 index 5a63e90..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 446.44 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.05 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log deleted file mode 100644 index 4fd877d..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1380.59 ± 26.70 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 28.29 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0fa0af0..0000000 --- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 465.50 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 25.06 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log deleted file mode 100644 index c7b493d..0000000 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 574.29 ± 4.39 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 17.78 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log deleted file mode 100644 index 2a70ab4..0000000 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 221.72 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 15.61 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log deleted file mode 100644 index 7c46f4c..0000000 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 448.90 ± 3.43 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 16.15 ± 0.01 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log deleted file mode 100644 index 3d43f7d..0000000 --- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_vulkan: Found 1 Vulkan devices: -ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 243.39 ± 0.00 | -| gpt-oss 20B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.76 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log index 1780e19..4326475 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1384.08 ± 13.61 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.16 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 791.20 ± 5.92 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.24 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log index 7d852e7..381be35 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1333.81 ± 9.84 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.64 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 539.70 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.26 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log index fcfb2c8..fc16f14 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1419.39 ± 12.17 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.24 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 795.34 ± 8.45 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.28 ± 0.01 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 64d7e12..7c903ac 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 537.64 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.44 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 538.62 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.21 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 0696210..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 353.60 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.87 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log index a2bda58..093a57c 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1326.80 ± 18.91 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.50 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 788.07 ± 19.38 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.65 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log index fa44d9e..5e1b2bb 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 544.55 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.36 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 585.34 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 61.13 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index d8cbd0b..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 398.06 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.13 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log index bd543ef..59cb88e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1365.89 ± 19.13 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.49 ± 0.01 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 777.22 ± 12.74 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.68 ± 0.00 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log index 0b1381a..45a310e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.88 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.88 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 533.90 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 61.16 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index cefe08d..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 456.33 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.03 ± 0.00 | - -build: 4fc43d43d (7085) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log index 69d55ba..5486142 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1510.54 ± 2.64 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.95 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1752.24 ± 12.09 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.82 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..5552e25 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 796.69 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.10 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index d55b6c5..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 474.83 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.83 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log index ebd62ee..86bf607 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1598.15 ± 4.85 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.02 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1717.36 ± 12.37 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.94 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..6dcbab1 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 796.57 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.08 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index bbce369..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 477.40 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.86 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log index 554365c..62ba657 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1510.09 ± 10.37 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.09 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.01 ± 23.22 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.48 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..ae120ce --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 808.14 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 64.31 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 0245ac8..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 518.94 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.02 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log index 43e084b..e64cdc4 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1572.54 ± 11.37 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.96 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1726.91 ± 4.81 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.44 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..e298cff --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 817.70 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 64.37 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 730bd47..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 554.20 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 56.02 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index 454f08b..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1511.00 ± 19.49 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.04 ± 0.03 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index e68f1b7..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 345.98 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.89 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index c2b5d5e..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1559.27 ± 17.65 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.16 ± 0.03 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 1c3d423..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.08 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.83 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log new file mode 100644 index 0000000..7ff138e --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1710.28 ± 7.42 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.12 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..e60eec1 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 670.09 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.17 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..29b33c0 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1695.75 ± 25.43 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.15 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..556c411 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 668.65 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.04 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log similarity index 79% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log index a264ea3..1530264 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1385.54 ± 24.93 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.72 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1670.49 ± 30.36 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.45 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..e0cabe3 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 658.07 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.69 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..00e6a9e --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1661.92 ± 6.16 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.41 ± 0.02 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..fd78ca6 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 654.36 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.59 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log deleted file mode 100644 index c25742d..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1476.67 ± 4.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.53 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index 2d5b65d..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 418.15 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.47 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 5940589..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1526.60 ± 21.01 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.45 ± 0.02 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 7545939..0000000 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 431.87 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.67 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log index 5841cfd..2763926 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1507.89 ± 10.23 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.07 ± 0.04 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1692.63 ± 8.33 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.09 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log index c996efd..a63519e 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 349.21 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.87 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 668.58 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.12 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log index 3467d3e..49df202 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1558.14 ± 14.95 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 73.06 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1721.79 ± 15.21 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 73.01 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index e36709a..1b523b7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 422.57 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.49 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 665.87 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 42.13 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log index 3e50229..fd42fa3 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1463.05 ± 15.58 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.34 ± 0.03 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1668.78 ± 30.58 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.33 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log index ad72e1c..6c3bf6a 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 435.15 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 54.40 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 633.71 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.64 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log index 7033622..67ac385 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1513.62 ± 5.84 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 72.47 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1674.84 ± 21.95 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.29 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log index f94ac8b..fc067d5 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 345.40 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 28.82 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 644.71 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 62.66 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log index a7d392a..eff8190 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1681.86 ± 231.36 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.38 ± 0.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1965.23 ± 21.66 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 75.24 ± 0.04 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log index c67d585..de60c57 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 300.31 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 46.98 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 512.34 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 56.91 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log index f6ceacb..297b848 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1354.58 ± 9.42 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 77.10 ± 0.22 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1395.08 ± 16.05 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 79.60 ± 0.03 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log index 4a9b8da..214c4a8 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 298.10 ± 0.00 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 52.75 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 467.70 ± 0.00 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 65.33 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log index 5dd9b43..71b27ca 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1117.58 ± 1.52 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.47 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1556.97 ± 0.78 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.65 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log index 733f960..8b6d29a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 47.68 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 94.98 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.03 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log index d4fe6c7..c8ebbe3 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1012.09 ± 1.56 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.61 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1561.25 ± 2.77 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.59 ± 0.02 | -build: 4db63cdde (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log index 4ba377e..c0ef94b 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1091.87 ± 1.16 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.02 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 94.41 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.04 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 8ef5a26..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 46.59 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | - -build: 4db63cdde (7085) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log index 9f57dab..32f721d 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1119.14 ± 0.89 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.51 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1572.74 ± 2.60 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.65 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log index 6ae1d5c..a5326b0 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 54.85 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 351.26 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.04 ± 0.00 | -build: 31df4608 (7038) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log deleted file mode 100644 index 809b014..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 167.07 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log index 82a2779..3bbc163 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1017.17 ± 2.70 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.53 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1572.88 ± 2.44 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.02 | -build: 4fc43d43d (7085) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log index e6aeba6..05bda17 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 53.92 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.58 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 346.35 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 10.03 ± 0.00 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log deleted file mode 100644 index 6d86c48..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,20 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -/opt/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu:89: ROCm error -/usr/local/lib64/libggml-base.so.0(+0x3565) [0x7f056f2a5565] -/usr/local/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f056f2a592b] -/usr/local/lib64/libggml-base.so.0(ggml_abort+0x11f) [0x7f056f2a5aaf] -/usr/local/lib64/libggml-hip.so.0(+0x28bcf12) [0x7f0571c1ef12] -/usr/local/lib64/libggml-hip.so.0(+0x28c1f64) [0x7f0571c23f64] -/usr/local/lib64/libggml-base.so.0(ggml_backend_sched_synchronize+0x2e) [0x7f056f2bc8ce] -/usr/local/lib64/libllama.so.0(_ZN13llama_context11synchronizeEv+0x10) [0x7f05722f1950] -/usr/local/bin/llama-bench() [0x40a3fc] -/usr/local/bin/llama-bench() [0x40816d] -/lib64/libc.so.6(+0x35b5) [0x7f056ec3b5b5] -/lib64/libc.so.6(__libc_start_main+0x88) [0x7f056ec3b668] -/usr/local/bin/llama-bench() [0x409255] -✖ ! [rocm-7alpha] llama-2-7b.Q4_0__hblt0__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log index b702a5c..8bea5c1 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1113.73 ± 1.45 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.09 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1525.39 ± 0.85 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.05 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..eb58868 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 104.91 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.08 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log deleted file mode 100644 index 9ad214d..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.04 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log index db329af..adc47d5 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1014.02 ± 2.02 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 50.96 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1524.22 ± 2.19 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..b41ed3a --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 106.82 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.07 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index fc72518..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 49.34 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log index 6f9bc79..8074b6d 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1116.12 ± 3.27 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.11 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1539.28 ± 0.84 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.02 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log new file mode 100644 index 0000000..5adfe23 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 384.07 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.06 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index 7ee8eea..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 186.52 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log index 2279b99..47a5836 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1021.92 ± 1.12 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.07 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1538.89 ± 3.35 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.07 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..52dabe0 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 382.87 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 12.07 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log deleted file mode 100644 index 45771d9..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 188.56 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | - -build: 1c398dc9e (7034) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log deleted file mode 100644 index afbf55d..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1100.61 ± 4.23 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.08 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log deleted file mode 100644 index a11d43b..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.66 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log deleted file mode 100644 index f5034a4..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1007.54 ± 4.33 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.04 ± 0.01 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log deleted file mode 100644 index 59dc74e..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 70.57 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log similarity index 78% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log index 411b668..e534712 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1008.52 ± 2.07 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.29 ± 0.02 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1492.67 ± 1.40 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.89 ± 0.01 | -build: 12bb5c37 (7074) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log new file mode 100644 index 0000000..be3abc4 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 141.66 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.37 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log new file mode 100644 index 0000000..d3c244e --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1495.58 ± 2.18 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.97 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..f7d0ab4 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 141.05 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log new file mode 100644 index 0000000..0d4f6c6 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1526.32 ± 2.10 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.96 ± 0.03 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log new file mode 100644 index 0000000..a4690c7 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 348.77 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.34 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log new file mode 100644 index 0000000..5226879 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1530.07 ± 0.42 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 51.01 ± 0.01 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log new file mode 100644 index 0000000..e50b6d7 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log @@ -0,0 +1,10 @@ +ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no +ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 348.73 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.36 ± 0.00 | + +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1.log deleted file mode 100644 index 5474b5f..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1107.98 ± 1.17 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.05 ± 0.02 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log deleted file mode 100644 index b7f4180..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 171.20 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log deleted file mode 100644 index 81a14b9..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1015.07 ± 2.17 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.00 ± 0.02 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log deleted file mode 100644 index 2d8527e..0000000 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log +++ /dev/null @@ -1,10 +0,0 @@ -ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no -ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no -ggml_cuda_init: found 1 ROCm devices: - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 174.55 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | - -build: ee8dd5c65 (7035) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log index 23e50c3..af74035 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1100.81 ± 1.25 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.00 ± 0.00 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1493.41 ± 1.54 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.90 ± 0.02 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log index 4eafa4f..7919346 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 71.68 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 139.87 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log index 336c571..a461503 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 963.09 ± 2.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.02 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1494.79 ± 2.85 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.91 ± 0.01 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log index d1c28ea..34ebd8c 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 177.96 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.92 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 139.47 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.36 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log index 6ab6bf8..9935294 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 1113.49 ± 3.85 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.02 ± 0.02 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1529.76 ± 1.36 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.85 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log index ebaa5b3..5c0b82d 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 173.34 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 350.95 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log index a879978..a8d33d1 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log @@ -2,9 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp512 | 968.65 ± 2.86 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg128 | 51.01 ± 0.01 | +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1530.14 ± 1.62 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.88 ± 0.01 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log similarity index 76% rename from benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log index c05fd49..fda552c 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log @@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices: Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 69.72 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.91 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d16384 | 347.70 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d16384 | 13.35 ± 0.00 | -build: bca95ca51 (7036) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log index 63adc8b..8493016 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1378.42 ± 1.37 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.41 ± 0.06 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1494.56 ± 4.36 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 56.03 ± 0.06 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log similarity index 80% rename from benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log index b66aa79..6f58f25 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 101.79 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 9.25 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 196.22 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 15.95 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log index cd66204..13624db 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1094.33 ± 3.06 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 53.65 ± 0.01 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp512 | 1135.49 ± 4.16 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg128 | 55.73 ± 0.02 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log similarity index 80% rename from benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log index 4578395..549cd93 100644 --- a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 174.60 ± 0.00 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 8.70 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d16384 | 294.01 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d16384 | 15.16 ± 0.00 | -build: 1c398dc9e (7034) +build: 2aa45ef9e (7423) diff --git a/docs/results.json b/docs/results.json index e250afa..7d9c8f5 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,27 +1,15 @@ { "meta": { - "generated_at": "2025-12-05T08:29:05Z", + "generated_at": "2025-12-21T18:48:48Z", "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)", "llamacpp_builds": [ - { - "hash": "03d9a77b8", - "number": "7278" - }, - { - "hash": "0a3857fe0", - "number": "7089" - }, { "hash": "12bb5c37", "number": "7074" }, { - "hash": "1c398dc9e", - "number": "7034" - }, - { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" }, { "hash": "4db63cdde", @@ -39,26 +27,14 @@ "hash": "86f1f4411", "number": "7085" }, - { - "hash": "ab5783eb4", - "number": "7089" - }, { "hash": "b447a9a4b", "number": "7085" }, - { - "hash": "bca95ca51", - "number": "7036" - }, { "hash": "caca0d55c", "number": "7085" }, - { - "hash": "ee8dd5c65", - "number": "7035" - }, { "hash": "f1840a25d", "number": "7085" @@ -79,10 +55,14 @@ "rocm6_4_4-hblt0", "rocm6_4_4-rocwmma", "rocm6_4_4-rocwmma-hblt0", - "rocm7.1", - "rocm7.1-hblt0", - "rocm7.1-rocwmma", - "rocm7.1-rocwmma-hblt0", + "rocm7.1.1", + "rocm7.1.1-hblt0", + "rocm7.1.1-rocwmma", + "rocm7.1.1-rocwmma-hblt0", + "rocm7_1", + "rocm7_1-hblt0", + "rocm7_1-rocwmma", + "rocm7_1-rocwmma-hblt0", "rocm7_rc", "rocm7_rc-hblt0", "rocm7_rc-rocwmma", @@ -93,168 +73,6 @@ "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second" }, "runs": [ - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 103.27, - "tps_std": 0.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 19.09, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 167.68, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 22.85, - "tps_std": 0.0, - "error": true, - "error_type": "runtime", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", @@ -265,8 +83,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 87.62, - "tps_std": 0.29, + "tps_mean": 247.81, + "tps_std": 0.75, "error": false, "error_type": null, "backend": "ROCm", @@ -279,8 +97,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -293,8 +111,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.57, - "tps_std": 0.0, + "tps_mean": 22.45, + "tps_std": 0.27, "error": false, "error_type": null, "backend": "ROCm", @@ -307,8 +125,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -318,13 +136,13 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 13.99, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 37.61, "tps_std": 0.0, - "error": true, - "error_type": "hang", + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -332,9 +150,40 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, - "build": null + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", @@ -346,8 +195,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 170.65, - "tps_std": 0.11, + "tps_mean": 246.64, + "tps_std": 0.87, "error": false, "error_type": null, "backend": "ROCm", @@ -360,8 +209,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -374,7 +223,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.54, + "tps_mean": 22.63, "tps_std": 0.0, "error": false, "error_type": null, @@ -388,8 +237,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -399,13 +248,13 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 14.57, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 37.54, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "runtime", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -413,40 +262,9 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } + "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", @@ -458,8 +276,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 93.53, - "tps_std": 0.08, + "tps_mean": 250.33, + "tps_std": 0.67, "error": false, "error_type": null, "backend": "ROCm", @@ -472,8 +290,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -486,7 +304,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.66, + "tps_mean": 22.7, "tps_std": 0.0, "error": false, "error_type": null, @@ -500,8 +318,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -511,21 +329,21 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 84.5, + "tps_std": 0.0, "error": true, "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -539,8 +357,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 171.42, - "tps_std": 0.59, + "tps_mean": 250.13, + "tps_std": 0.62, "error": false, "error_type": null, "backend": "ROCm", @@ -553,8 +371,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -567,7 +385,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.69, + "tps_mean": 22.71, "tps_std": 0.0, "error": false, "error_type": null, @@ -581,8 +399,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -592,8 +410,8 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, + "context": "longctx16384", + "context_tokens": 16384, "test": null, "tps_mean": null, "tps_std": null, @@ -606,7 +424,7 @@ "file_size_gib": null, "name_params_b": null, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -620,8 +438,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 135.1, - "tps_std": 0.15, + "tps_mean": 330.74, + "tps_std": 2.03, "error": false, "error_type": null, "backend": "ROCm", @@ -634,8 +452,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -648,7 +466,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.72, + "tps_mean": 21.74, "tps_std": 0.0, "error": false, "error_type": null, @@ -662,8 +480,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -673,10 +491,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 12.41, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 33.8, "tps_std": 0.0, "error": true, "error_type": "hang", @@ -687,7 +505,7 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -701,8 +519,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 142.39, - "tps_std": 0.25, + "tps_mean": 330.13, + "tps_std": 0.85, "error": false, "error_type": null, "backend": "ROCm", @@ -715,8 +533,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -729,8 +547,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.7, - "tps_std": 0.02, + "tps_mean": 21.73, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -743,8 +561,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -754,202 +572,9 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 13.3, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 135.5, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 30.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 172.61, - "tps_std": 0.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", "tps_mean": 33.91, "tps_std": 0.0, "error": true, @@ -961,22 +586,246 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 333.45, + "tps_std": 1.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 98.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 336.2, + "tps_std": 2.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 98.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.88, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 77.55, - "tps_std": 0.6, + "tps_mean": 323.36, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", @@ -986,24 +835,24 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.7, + "tps_mean": 21.68, "tps_std": 0.0, "error": false, "error_type": null, @@ -1014,105 +863,24 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.62, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 146.5, - "tps_std": 0.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.23, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 47.07, "tps_std": 0.0, "error": true, "error_type": "runtime", @@ -1123,22 +891,22 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 99.42, - "tps_std": 0.19, + "tps_mean": 323.91, + "tps_std": 1.1, "error": false, "error_type": null, "backend": "ROCm", @@ -1148,24 +916,24 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.7, + "tps_mean": 21.68, "tps_std": 0.0, "error": false, "error_type": null, @@ -1176,251 +944,27 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 29.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 172.08, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 32.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 72.38, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.73, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.4, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 46.62, "tps_std": 0.0, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -1428,22 +972,22 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 131.85, - "tps_std": 0.23, + "tps_mean": 330.9, + "tps_std": 1.42, "error": false, "error_type": null, "backend": "ROCm", @@ -1453,78 +997,25 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.71, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.15, + "tps_mean": 21.83, "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 99.25, - "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", @@ -1534,50 +1025,22 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.54, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, + "context": "longctx16384", + "context_tokens": 16384, "test": null, "tps_mean": null, "tps_std": null, @@ -1590,7 +1053,331 @@ "file_size_gib": null, "name_params_b": null, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 329.23, + "tps_std": 1.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 323.77, + "tps_std": 1.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 46.38, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 323.19, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 46.51, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 330.87, + "tps_std": 0.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -1604,8 +1391,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 169.93, - "tps_std": 0.33, + "tps_mean": 330.19, + "tps_std": 0.73, "error": false, "error_type": null, "backend": "ROCm", @@ -1618,8 +1405,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -1632,7 +1419,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.71, + "tps_mean": 21.82, "tps_std": 0.0, "error": false, "error_type": null, @@ -1646,8 +1433,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -1657,54 +1444,23 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 33.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } + "build": null }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", @@ -1716,8 +1472,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 217.91, - "tps_std": 0.48, + "tps_mean": 228.89, + "tps_std": 0.52, "error": false, "error_type": null, "backend": "Vulkan", @@ -1730,8 +1486,8 @@ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -1744,119 +1500,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 24.5, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 22.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 235.07, - "tps_std": 0.58, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 68.01, - "name_params_b": 110.47, - "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", - "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.84, + "tps_mean": 24.48, "tps_std": 0.01, "error": false, "error_type": null, @@ -1867,11 +1511,95 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 40.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 9.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 243.57, + "tps_std": 0.43, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -1881,10 +1609,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 33.43, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 24.54, "tps_std": 0.0, "error": false, "error_type": null, @@ -1895,11 +1623,11 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -1909,10 +1637,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.49, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 52.62, "tps_std": 0.0, "error": false, "error_type": null, @@ -1923,25 +1651,53 @@ "file_size_gib": 68.01, "name_params_b": 110.47, "quant": "Q4_K_XL", - "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 14.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "env_variant": "7alpha-rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 92.38, - "tps_std": 0.37, + "tps_mean": 194.43, + "tps_std": 0.27, "error": false, "error_type": null, "backend": "ROCm", @@ -1951,19 +1707,100 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 36.61, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 195.23, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, @@ -1979,288 +1816,11 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 19.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 147.75, - "tps_std": 0.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 22.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 86.5, - "tps_std": 0.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 14.06, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 140.67, - "tps_std": 0.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -2270,10 +1830,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 36.83, "tps_std": 0.0, "error": false, "error_type": null, @@ -2284,11 +1844,11 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -2298,10 +1858,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 14.6, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.4, "tps_std": 0.0, "error": false, "error_type": null, @@ -2312,39 +1872,11 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -2357,8 +1889,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 74.73, - "tps_std": 0.27, + "tps_mean": 195.45, + "tps_std": 0.65, "error": false, "error_type": null, "backend": "ROCm", @@ -2371,8 +1903,8 @@ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -2385,8 +1917,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.66, - "tps_std": 0.0, + "tps_mean": 16.5, + "tps_std": 0.31, "error": false, "error_type": null, "backend": "ROCm", @@ -2399,8 +1931,8 @@ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -2410,125 +1942,13 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 26.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 151.03, - "tps_std": 0.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, + "context": "longctx16384", + "context_tokens": 16384, "test": null, "tps_mean": null, "tps_std": null, - "error": true, - "error_type": "runtime", + "error": false, + "error_type": null, "backend": null, "ngl": null, "mmap": null, @@ -2536,183 +1956,21 @@ "file_size_gib": null, "name_params_b": null, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 125.43, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.48, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 140.41, - "tps_std": 0.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.52, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 13.2, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 130.63, + "tps_mean": 195.71, "tps_std": 0.7, "error": false, "error_type": null, @@ -2723,24 +1981,24 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.44, + "tps_mean": 16.69, "tps_std": 0.0, "error": false, "error_type": null, @@ -2751,81 +2009,50 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 26.75, - "tps_std": 0.0, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } + "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 145.79, - "tps_std": 0.11, + "tps_mean": 275.04, + "tps_std": 0.75, "error": false, "error_type": null, "backend": "ROCm", @@ -2835,25 +2062,271 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": "hblt0", + "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", "tps_mean": 16.57, - "tps_std": 0.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 33.7, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 272.75, + "tps_std": 1.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 33.85, + "tps_std": 0.0, + "error": true, + "error_type": "hang", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 277.38, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 92.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 11.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 277.33, + "tps_std": 0.75, "error": false, "error_type": null, "backend": "ROCm", @@ -2866,8 +2339,8 @@ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -2877,117 +2350,11 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 33.65, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 69.31, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.5, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.07, + "tps_mean": 16.62, "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 136.65, - "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", @@ -2997,407 +2364,24 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.46, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.05, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 94.32, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.25, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 130.72, - "tps_std": 0.76, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.54, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 114.56, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 159.14, - "tps_std": 0.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.44, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, - "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", - "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.46, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 92.73, "tps_std": 0.0, "error": true, "error_type": "hang", @@ -3408,22 +2392,22 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 96.45, - "tps_std": 0.26, + "tps_mean": 254.32, + "tps_std": 0.84, "error": false, "error_type": null, "backend": "ROCm", @@ -3433,19 +2417,19 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, @@ -3461,22 +2445,184 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 46.17, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 253.04, + "tps_std": 1.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 46.53, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 257.7, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, "test": null, "tps_mean": null, "tps_std": null, @@ -3489,22 +2635,22 @@ "file_size_gib": null, "name_params_b": null, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log", "rpc": false, "build": null }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 130.86, - "tps_std": 0.36, + "tps_mean": 259.4, + "tps_std": 0.46, "error": false, "error_type": null, "backend": "ROCm", @@ -3514,25 +2660,187 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", + "tps_mean": 16.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 254.22, + "tps_std": 1.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 45.9, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 253.25, + "tps_std": 1.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", "tps_mean": 16.53, - "tps_std": 0.06, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -3542,11 +2850,148 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 45.93, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 258.89, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 79.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 10.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3556,21 +3001,46 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 36.62, - "tps_std": 0.0, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 110.47, - "file_size_gib": 94.57, - "name_params_b": 110.47, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -3584,8 +3054,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 261.54, - "tps_std": 1.01, + "tps_mean": 279.25, + "tps_std": 0.28, "error": false, "error_type": null, "backend": "Vulkan", @@ -3598,8 +3068,8 @@ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3612,8 +3082,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 17.42, - "tps_std": 0.0, + "tps_mean": 17.61, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -3626,8 +3096,8 @@ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3637,10 +3107,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 23.19, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 42.15, "tps_std": 0.0, "error": false, "error_type": null, @@ -3651,11 +3121,11 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3665,10 +3135,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.25, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.96, "tps_std": 0.0, "error": false, "error_type": null, @@ -3679,11 +3149,11 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3696,8 +3166,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 233.87, - "tps_std": 0.08, + "tps_mean": 244.36, + "tps_std": 0.45, "error": false, "error_type": null, "backend": "Vulkan", @@ -3710,8 +3180,8 @@ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3724,7 +3194,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 17.74, + "tps_mean": 17.73, "tps_std": 0.01, "error": false, "error_type": null, @@ -3738,8 +3208,8 @@ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3749,10 +3219,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 33.31, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 54.92, "tps_std": 0.0, "error": false, "error_type": null, @@ -3763,11 +3233,11 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -3777,10 +3247,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.0, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 11.62, "tps_std": 0.0, "error": false, "error_type": null, @@ -3791,235 +3261,11 @@ "file_size_gib": 94.57, "name_params_b": 110.47, "quant": "Q6_K_XL", - "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 103.85, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 19.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 101.51, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 19.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4032,8 +3278,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 103.65, - "tps_std": 0.07, + "tps_mean": 65.74, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -4046,8 +3292,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4074,8 +3320,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4085,10 +3331,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 12.76, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 23.88, "tps_std": 0.0, "error": false, "error_type": null, @@ -4099,11 +3345,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4113,10 +3359,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 0.8, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 1.52, "tps_std": 0.0, "error": false, "error_type": null, @@ -4127,11 +3373,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4144,8 +3390,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 103.07, - "tps_std": 0.08, + "tps_mean": 65.41, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -4158,8 +3404,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4172,7 +3418,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.78, + "tps_mean": 2.79, "tps_std": 0.0, "error": false, "error_type": null, @@ -4186,8 +3432,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4197,13 +3443,13 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 12.71, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 24.05, "tps_std": 0.0, - "error": true, - "error_type": "runtime", + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -4211,9 +3457,40 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, - "build": null + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 1.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", @@ -4225,8 +3502,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 105.64, - "tps_std": 0.24, + "tps_mean": 65.85, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -4239,8 +3516,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4267,8 +3544,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4278,10 +3555,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 33.13, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 37.47, "tps_std": 0.0, "error": false, "error_type": null, @@ -4292,11 +3569,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4306,10 +3583,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.31, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.61, "tps_std": 0.0, "error": false, "error_type": null, @@ -4320,11 +3597,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4337,8 +3614,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 102.84, - "tps_std": 0.31, + "tps_mean": 65.38, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -4351,8 +3628,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4379,8 +3656,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4390,10 +3667,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.86, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 37.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -4404,11 +3681,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4418,10 +3695,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.28, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.51, "tps_std": 0.0, "error": false, "error_type": null, @@ -4432,11 +3709,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4449,8 +3726,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 102.56, - "tps_std": 0.06, + "tps_mean": 145.84, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -4463,8 +3740,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4491,8 +3768,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4502,13 +3779,13 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 11.74, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 26.43, "tps_std": 0.0, - "error": true, - "error_type": "hang", + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -4516,9 +3793,40 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, - "build": null + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 1.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", @@ -4530,8 +3838,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 103.23, - "tps_std": 0.06, + "tps_mean": 144.36, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "ROCm", @@ -4544,8 +3852,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4572,8 +3880,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4583,10 +3891,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 11.69, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 26.46, "tps_std": 0.0, "error": true, "error_type": "hang", @@ -4597,7 +3905,7 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -4611,8 +3919,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 105.28, - "tps_std": 0.1, + "tps_mean": 145.01, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -4625,8 +3933,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4653,8 +3961,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4664,23 +3972,54 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 70.0, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 56.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", "rpc": false, - "build": null + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", @@ -4692,8 +4031,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 104.32, - "tps_std": 0.29, + "tps_mean": 146.28, + "tps_std": 0.12, "error": false, "error_type": null, "backend": "ROCm", @@ -4706,8 +4045,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4734,8 +4073,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4745,10 +4084,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.35, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 56.12, "tps_std": 0.0, "error": false, "error_type": null, @@ -4759,11 +4098,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -4773,10 +4112,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.4, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.6, "tps_std": 0.0, "error": false, "error_type": null, @@ -4787,25 +4126,25 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 102.8, - "tps_std": 0.11, + "tps_mean": 146.01, + "tps_std": 0.05, "error": false, "error_type": null, "backend": "ROCm", @@ -4815,18 +4154,18 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", @@ -4843,27 +4182,27 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 15.25, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 35.23, "tps_std": 0.0, - "error": false, - "error_type": null, + "error": true, + "error_type": "runtime", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -4871,53 +4210,22 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } + "build": null }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.07, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 102.92, - "tps_std": 0.14, + "tps_mean": 143.94, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", @@ -4927,18 +4235,18 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", @@ -4955,24 +4263,24 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 15.47, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 34.82, "tps_std": 0.0, "error": false, "error_type": null, @@ -4983,24 +4291,24 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.07, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 1.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -5011,25 +4319,25 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 103.28, - "tps_std": 0.1, + "tps_mean": 147.07, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -5039,18 +4347,18 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", @@ -5067,81 +4375,50 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 34.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 70.0, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } + "build": null }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 104.3, - "tps_std": 0.24, + "tps_mean": 145.12, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -5151,18 +4428,18 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", @@ -5179,24 +4456,24 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 35.54, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 52.68, "tps_std": 0.0, "error": false, "error_type": null, @@ -5207,24 +4484,24 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.46, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.5, "tps_std": 0.0, "error": false, "error_type": null, @@ -5235,11 +4512,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5252,7 +4529,7 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 103.21, + "tps_mean": 145.84, "tps_std": 0.08, "error": false, "error_type": null, @@ -5266,8 +4543,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5294,8 +4571,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5305,13 +4582,13 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 15.05, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 35.12, "tps_std": 0.0, - "error": true, - "error_type": "hang", + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -5319,9 +4596,40 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, - "build": null + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 1.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", @@ -5333,8 +4641,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 96.88, - "tps_std": 0.1, + "tps_mean": 143.47, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -5347,8 +4655,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5375,8 +4683,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5386,10 +4694,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 15.08, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 35.06, "tps_std": 0.0, "error": false, "error_type": null, @@ -5400,11 +4708,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5414,10 +4722,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 1.07, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 1.82, "tps_std": 0.0, "error": false, "error_type": null, @@ -5428,11 +4736,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5445,8 +4753,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 104.95, - "tps_std": 0.14, + "tps_mean": 145.96, + "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", @@ -5459,8 +4767,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5473,7 +4781,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 2.79, + "tps_mean": 2.78, "tps_std": 0.0, "error": false, "error_type": null, @@ -5487,8 +4795,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5498,120 +4806,8 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 32.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 97.99, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.79, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 70.55, - "file_size_gib": 75.65, - "name_params_b": 70.55, - "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, + "context": "longctx16384", + "context_tokens": 16384, "test": null, "tps_mean": null, "tps_std": null, @@ -5624,10 +4820,122 @@ "file_size_gib": null, "name_params_b": 70.0, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": null }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 145.24, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 52.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", @@ -5638,8 +4946,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 98.55, - "tps_std": 0.11, + "tps_mean": 100.73, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "Vulkan", @@ -5652,8 +4960,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5680,8 +4988,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5691,10 +4999,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 18.09, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 30.75, "tps_std": 0.0, "error": false, "error_type": null, @@ -5705,11 +5013,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5719,10 +5027,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.12, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.46, "tps_std": 0.0, "error": false, "error_type": null, @@ -5733,11 +5041,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5750,8 +5058,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 86.56, - "tps_std": 0.57, + "tps_mean": 88.29, + "tps_std": 0.76, "error": false, "error_type": null, "backend": "Vulkan", @@ -5764,8 +5072,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5792,8 +5100,8 @@ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5803,10 +5111,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 22.4, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 31.75, "tps_std": 0.0, "error": false, "error_type": null, @@ -5817,11 +5125,11 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -5831,10 +5139,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.36, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 2.56, "tps_std": 0.0, "error": false, "error_type": null, @@ -5845,178 +5153,16 @@ "file_size_gib": 75.65, "name_params_b": 70.55, "quant": "Q8_K_XL", - "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 263.94, - "tps_std": 2.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 155.11, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 152.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm-7alpha-rocwmma", "env_base": "rocm", "env_variant": "7alpha-rocwmma", @@ -6024,27 +5170,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 273.53, - "tps_std": 2.86, + "tps_mean": 788.05, + "tps_std": 2.61, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm-7alpha-rocwmma", "env_base": "rocm", "env_variant": "7alpha-rocwmma", @@ -6052,83 +5198,83 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.17, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 109.57, + "tps_mean": 8.48, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm-7alpha-rocwmma", "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.85, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 139.54, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 5.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm-7alpha-rocwmma-hblt0", "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", @@ -6136,27 +5282,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 274.07, - "tps_std": 3.25, + "tps_mean": 787.49, + "tps_std": 3.69, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm-7alpha-rocwmma-hblt0", "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", @@ -6164,52 +5310,83 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.13, + "tps_mean": 8.48, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm-7alpha-rocwmma-hblt0", "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 109.44, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 136.23, "tps_std": 0.0, - "error": true, - "error_type": "runtime", + "error": false, + "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, - "build": null + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 5.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", "env": "rocm-7alpha", "env_base": "rocm", "env_variant": "7alpha", @@ -6217,5839 +5394,3802 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 273.71, - "tps_std": 1.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 188.24, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 289.25, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 102.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 292.12, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 99.11, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 290.6, - "tps_std": 0.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 201.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 292.38, - "tps_std": 1.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 293.23, - "tps_std": 0.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 128.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 287.19, - "tps_std": 1.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 132.25, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.62, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 294.05, - "tps_std": 2.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.17, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 201.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 292.59, - "tps_std": 1.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 126.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 287.84, - "tps_std": 2.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 127.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 292.02, - "tps_std": 1.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 207.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 282.7, - "tps_std": 0.57, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.15, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 202.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 224.36, - "tps_std": 2.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 84.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 211.78, - "tps_std": 1.53, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.73, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 85.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 82.35, - "name_params_b": 107.77, - "quant": "Q6_K", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 272.42, - "tps_std": 2.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 149.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 271.67, - "tps_std": 1.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.13, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 153.04, - "tps_std": 0.0, - "error": true, - "error_type": "runtime", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 273.57, - "tps_std": 2.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 108.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 269.91, - "tps_std": 0.99, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.11, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 107.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 274.27, - "tps_std": 3.87, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 190.45, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 296.39, - "tps_std": 0.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 100.8, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 295.81, - "tps_std": 2.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 99.06, - "tps_std": 0.0, - "error": true, - "error_type": "runtime", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 295.53, - "tps_std": 3.47, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 208.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 298.3, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 295.26, - "tps_std": 1.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 124.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 292.62, - "tps_std": 1.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 124.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.68, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 296.33, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 296.32, - "tps_std": 1.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 202.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 291.43, - "tps_std": 1.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 127.05, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 287.94, - "tps_std": 1.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 127.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 268.04, - "tps_std": 46.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 287.0, - "tps_std": 2.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 202.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 349.58, - "tps_std": 2.09, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 99.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 280.28, - "tps_std": 1.95, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 12.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 106.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 106.65, - "name_params_b": 107.77, - "quant": "Q8_0", - "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 313.68, - "tps_std": 2.67, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 154.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 312.46, - "tps_std": 3.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 311.89, - "tps_std": 2.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 109.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 313.81, - "tps_std": 0.68, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 109.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 314.61, - "tps_std": 2.38, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 315.62, - "tps_std": 2.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 19.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 308.87, - "tps_std": 1.54, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.54, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 101.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 306.69, - "tps_std": 2.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.58, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 100.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 308.5, - "tps_std": 4.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 209.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 308.24, - "tps_std": 0.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 217.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 304.34, - "tps_std": 2.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.61, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 138.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 305.86, - "tps_std": 2.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.55, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 134.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 306.39, - "tps_std": 1.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 198.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 309.0, + "tps_mean": 799.32, "tps_std": 2.3, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 201.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.81, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 305.09, - "tps_std": 1.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.58, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 136.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 292.67, - "tps_std": 0.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.6, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 302.22, - "tps_std": 1.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.58, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 196.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 300.96, - "tps_std": 2.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 18.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "hang", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": 17.0, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 195.52, - "tps_std": 1.35, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 20.65, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 81.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 13.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 230.79, - "tps_std": 1.84, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 20.79, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 89.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 107.77, - "file_size_gib": 57.73, - "name_params_b": 107.77, - "quant": "Q4_K_XL", - "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 273.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 141.46, + "tps_mean": 804.49, + "tps_std": 4.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 282.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 792.57, + "tps_std": 2.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 121.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 794.96, + "tps_std": 3.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 118.46, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 802.78, + "tps_std": 0.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 289.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 810.15, + "tps_std": 2.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 316.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 781.49, + "tps_std": 1.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 176.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 795.45, + "tps_std": 1.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 179.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 797.09, + "tps_std": 3.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 261.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 800.44, + "tps_std": 2.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 282.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 794.13, + "tps_std": 2.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 177.29, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 757.71, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 175.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 803.93, + "tps_std": 1.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 266.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 767.62, + "tps_std": 3.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 280.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 7.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 228.96, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 101.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 198.3, "tps_std": 1.06, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.01, + "tps_mean": 7.57, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 48.54, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 98.61, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.81, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 6.84, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", + "env_variant": "7alpha-rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 140.4, - "tps_std": 0.48, + "tps_mean": 575.9, + "tps_std": 0.6, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", + "env_variant": "7alpha-rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.93, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 49.58, + "tps_mean": 41.67, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", + "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.43, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 815.96, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 37.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 566.25, + "tps_std": 5.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.75, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 818.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 37.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 573.96, + "tps_std": 1.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 840.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 568.52, + "tps_std": 6.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.75, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 842.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1111.52, + "tps_std": 3.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1161.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 38.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1075.82, + "tps_std": 2.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.19, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1159.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 38.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1111.9, + "tps_std": 4.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.13, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1083.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1074.4, + "tps_std": 7.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.17, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1126.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1110.04, + "tps_std": 2.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.43, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1034.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 38.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1073.92, + "tps_std": 6.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1041.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 38.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1119.24, + "tps_std": 8.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 964.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1086.57, + "tps_std": 5.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.44, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 972.22, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1100.11, + "tps_std": 6.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.51, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1036.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 38.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1066.41, + "tps_std": 6.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1030.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 38.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1110.36, + "tps_std": 6.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.23, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1042.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1074.73, + "tps_std": 4.78, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1001.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.21, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1224.54, + "tps_std": 7.71, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.56, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 569.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 940.69, + "tps_std": 5.6, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.38, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 489.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12062,8 +9202,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 143.18, - "tps_std": 0.54, + "tps_mean": 195.33, + "tps_std": 3.19, "error": false, "error_type": null, "backend": "ROCm", @@ -12076,8 +9216,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12090,8 +9230,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.08, - "tps_std": 0.0, + "tps_mean": 16.05, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -12104,8 +9244,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12115,91 +9255,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 28.32, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 142.52, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.13, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 29.46, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 73.22, "tps_std": 0.0, "error": true, "error_type": "runtime", @@ -12210,7 +9269,88 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 195.28, + "tps_std": 1.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 72.86, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -12224,8 +9364,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 144.03, - "tps_std": 1.12, + "tps_mean": 194.82, + "tps_std": 1.18, "error": false, "error_type": null, "backend": "ROCm", @@ -12238,8 +9378,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12252,7 +9392,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.04, + "tps_mean": 16.06, "tps_std": 0.0, "error": false, "error_type": null, @@ -12266,8 +9406,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12277,10 +9417,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.64, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 63.02, "tps_std": 0.0, "error": false, "error_type": null, @@ -12291,11 +9431,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12305,10 +9445,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.44, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 10.73, "tps_std": 0.0, "error": false, "error_type": null, @@ -12319,11 +9459,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12336,8 +9476,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 140.69, - "tps_std": 0.99, + "tps_mean": 196.8, + "tps_std": 1.26, "error": false, "error_type": null, "backend": "ROCm", @@ -12350,8 +9490,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12364,8 +9504,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.07, - "tps_std": 0.05, + "tps_mean": 16.01, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -12378,8 +9518,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12389,10 +9529,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 38.47, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 65.07, "tps_std": 0.0, "error": false, "error_type": null, @@ -12403,11 +9543,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12417,10 +9557,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.2, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 11.83, "tps_std": 0.0, "error": false, "error_type": null, @@ -12431,11 +9571,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12448,8 +9588,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 143.65, - "tps_std": 1.06, + "tps_mean": 198.21, + "tps_std": 1.42, "error": false, "error_type": null, "backend": "ROCm", @@ -12462,8 +9602,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12476,8 +9616,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.07, - "tps_std": 0.0, + "tps_mean": 15.09, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -12490,8 +9630,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12501,10 +9641,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 24.32, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 66.78, "tps_std": 0.0, "error": true, "error_type": "hang", @@ -12515,7 +9655,7 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -12529,8 +9669,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 142.82, - "tps_std": 1.43, + "tps_mean": 202.49, + "tps_std": 1.92, "error": false, "error_type": null, "backend": "ROCm", @@ -12543,8 +9683,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12557,7 +9697,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 15.11, + "tps_mean": 15.09, "tps_std": 0.01, "error": false, "error_type": null, @@ -12571,8 +9711,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12582,13 +9722,13 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 24.0, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 69.68, "tps_std": 0.0, "error": true, - "error_type": "hang", + "error_type": "runtime", "backend": "ROCm", "ngl": 99, "mmap": 0, @@ -12596,7 +9736,7 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -12610,8 +9750,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 146.45, - "tps_std": 0.58, + "tps_mean": 205.39, + "tps_std": 1.95, "error": false, "error_type": null, "backend": "ROCm", @@ -12624,8 +9764,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12638,8 +9778,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.88, - "tps_std": 0.01, + "tps_mean": 14.83, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -12652,8 +9792,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -12663,484 +9803,36 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 47.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 145.3, - "tps_std": 1.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 48.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 144.51, - "tps_std": 1.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 27.99, - "tps_std": 0.0, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, "error": true, "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx16384.log", "rpc": false, "build": null }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 144.56, - "tps_std": 0.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 27.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.99, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 146.23, - "tps_std": 0.15, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 39.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 146.0, - "tps_std": 0.62, + "tps_mean": 201.21, + "tps_std": 1.57, "error": false, "error_type": null, "backend": "ROCm", @@ -13150,18 +9842,18 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, "context": "default", @@ -13178,439 +9870,22 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 40.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 144.65, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 27.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 142.7, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 28.48, - "tps_std": 0.0, - "error": true, - "error_type": "hang", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 145.17, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 39.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 143.77, - "tps_std": 0.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 15.01, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 235.09, - "file_size_gib": 96.99, - "name_params_b": 235.09, - "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", - "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, + "context": "longctx16384", + "context_tokens": 16384, "test": null, "tps_mean": null, "tps_std": null, @@ -13623,7 +9898,686 @@ "file_size_gib": null, "name_params_b": 235.0, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 197.07, + "tps_std": 0.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.09, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 65.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 5.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 195.14, + "tps_std": 1.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.08, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 64.99, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 199.48, + "tps_std": 2.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.91, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 197.33, + "tps_std": 0.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 62.78, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 199.34, + "tps_std": 1.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 15.09, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 62.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 5.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 201.37, + "tps_std": 1.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 199.51, + "tps_std": 1.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.98, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 235.0, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -13637,8 +10591,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 139.19, - "tps_std": 0.25, + "tps_mean": 145.16, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "Vulkan", @@ -13651,8 +10605,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -13665,8 +10619,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 17.45, - "tps_std": 0.01, + "tps_mean": 17.77, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -13679,8 +10633,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -13690,10 +10644,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.94, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 31.17, "tps_std": 0.0, "error": false, "error_type": null, @@ -13704,11 +10658,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -13718,10 +10672,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.42, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 5.72, "tps_std": 0.0, "error": false, "error_type": null, @@ -13732,11 +10686,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -13749,8 +10703,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 128.55, - "tps_std": 1.17, + "tps_mean": 131.53, + "tps_std": 1.13, "error": false, "error_type": null, "backend": "Vulkan", @@ -13763,8 +10717,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -13777,8 +10731,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 18.47, - "tps_std": 0.01, + "tps_mean": 18.08, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -13791,8 +10745,8 @@ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -13802,10 +10756,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 22.82, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 34.22, "tps_std": 0.0, "error": false, "error_type": null, @@ -13816,11 +10770,11 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -13830,10 +10784,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.88, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 9.66, "tps_std": 0.0, "error": false, "error_type": null, @@ -13844,25 +10798,25 @@ "file_size_gib": 96.99, "name_params_b": 235.09, "quant": "Q3_K_XL", - "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "env_variant": "7alpha-rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 478.1, - "tps_std": 4.01, + "tps_mean": 489.1, + "tps_std": 3.76, "error": false, "error_type": null, "backend": "ROCm", @@ -13872,19 +10826,467 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 282.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 491.47, + "tps_std": 1.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 271.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 14.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 491.95, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.1, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 336.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 492.47, + "tps_std": 3.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.15, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 318.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 567.78, + "tps_std": 2.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, @@ -13900,624 +11302,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 197.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 352.23, - "tps_std": 9.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 192.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 483.01, - "tps_std": 4.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 147.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 345.22, - "tps_std": 23.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.84, - "tps_std": 0.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 135.26, - "tps_std": 0.0, - "error": true, - "error_type": "runtime", - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 482.27, - "tps_std": 5.93, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 164.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 360.93, - "tps_std": 3.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 197.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 479.75, - "tps_std": 5.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -14527,6 +11316,90 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 268.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 18.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 575.99, + "tps_std": 6.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, "context": "default", "context_tokens": null, "test": "tg128", @@ -14541,67 +11414,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 107.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -14611,11 +11428,67 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 255.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 18.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 360.25, - "tps_std": 7.4, + "tps_mean": 576.13, + "tps_std": 3.25, "error": false, "error_type": null, "backend": "ROCm", @@ -14625,24 +11498,24 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", + "env": "rocm6_4_4", "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 26.84, + "tps_mean": 26.13, "tps_std": 0.01, "error": false, "error_type": null, @@ -14653,95 +11526,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 107.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 493.29, - "tps_std": 1.77, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -14751,82 +11540,23 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, + "context": "longctx16384", + "context_tokens": 16384, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 30.0, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 208.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } + "build": null }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", @@ -14837,24 +11567,21 @@ "fa": true, "context": "default", "context_tokens": null, - "test": "pp512", - "tps_mean": 362.53, - "tps_std": 2.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "hang", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 30.0, "quant": "BF16", "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } + "build": null }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", @@ -14863,10 +11590,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.8, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 377.73, "tps_std": 0.0, "error": false, "error_type": null, @@ -14877,11 +11604,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -14891,10 +11618,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 244.87, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.29, "tps_std": 0.0, "error": false, "error_type": null, @@ -14905,53 +11632,25 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 484.23, - "tps_std": 1.92, + "tps_mean": 460.49, + "tps_std": 1.91, "error": false, "error_type": null, "backend": "ROCm", @@ -14961,248 +11660,24 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", "tps_mean": 27.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 357.45, - "tps_std": 1.2, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 127.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 485.6, - "tps_std": 4.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 56.89, - "name_params_b": 30.53, - "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.98, "tps_std": 0.01, "error": false, "error_type": null, @@ -15213,24 +11688,24 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 174.51, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 234.4, "tps_std": 0.0, "error": false, "error_type": null, @@ -15241,24 +11716,24 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.95, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 17.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -15269,25 +11744,249 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 467.86, + "tps_std": 1.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 224.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 18.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 468.47, + "tps_std": 2.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 360.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.31, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 360.51, - "tps_std": 0.53, + "tps_mean": 470.04, + "tps_std": 3.69, "error": false, "error_type": null, "backend": "ROCm", @@ -15297,24 +11996,24 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 26.98, + "tps_mean": 27.02, "tps_std": 0.0, "error": false, "error_type": null, @@ -15325,24 +12024,24 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 174.18, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 337.92, "tps_std": 0.0, "error": false, "error_type": null, @@ -15353,24 +12052,24 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.94, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.32, "tps_std": 0.0, "error": false, "error_type": null, @@ -15381,11 +12080,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15398,8 +12097,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 472.32, - "tps_std": 1.65, + "tps_mean": 461.77, + "tps_std": 2.14, "error": false, "error_type": null, "backend": "ROCm", @@ -15412,8 +12111,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15426,8 +12125,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.13, - "tps_std": 0.01, + "tps_mean": 27.1, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -15440,8 +12139,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15451,10 +12150,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 129.44, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 234.95, "tps_std": 0.0, "error": false, "error_type": null, @@ -15465,11 +12164,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15479,10 +12178,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 18.08, "tps_std": 0.0, "error": false, "error_type": null, @@ -15493,11 +12192,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15510,8 +12209,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 348.39, - "tps_std": 4.07, + "tps_mean": 463.39, + "tps_std": 4.6, "error": false, "error_type": null, "backend": "ROCm", @@ -15524,8 +12223,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15538,7 +12237,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.16, + "tps_mean": 27.15, "tps_std": 0.0, "error": false, "error_type": null, @@ -15552,8 +12251,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15563,10 +12262,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 125.66, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 223.07, "tps_std": 0.0, "error": false, "error_type": null, @@ -15577,11 +12276,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15591,10 +12290,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 12.6, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 18.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -15605,11 +12304,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15622,8 +12321,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 487.44, - "tps_std": 3.08, + "tps_mean": 466.61, + "tps_std": 1.68, "error": false, "error_type": null, "backend": "ROCm", @@ -15636,8 +12335,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15650,7 +12349,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 26.97, + "tps_mean": 27.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -15664,8 +12363,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15675,10 +12374,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 194.86, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 319.2, "tps_std": 0.0, "error": false, "error_type": null, @@ -15689,11 +12388,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15703,10 +12402,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.04, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.52, "tps_std": 0.0, "error": false, "error_type": null, @@ -15717,11 +12416,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15734,8 +12433,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 350.49, - "tps_std": 4.28, + "tps_mean": 473.68, + "tps_std": 1.69, "error": false, "error_type": null, "backend": "ROCm", @@ -15748,8 +12447,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15762,7 +12461,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.05, + "tps_mean": 27.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -15776,8 +12475,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15787,10 +12486,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 174.05, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 337.79, "tps_std": 0.0, "error": false, "error_type": null, @@ -15801,11 +12500,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15815,10 +12514,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 18.97, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.34, "tps_std": 0.0, "error": false, "error_type": null, @@ -15829,11 +12528,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15846,8 +12545,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 191.31, - "tps_std": 0.05, + "tps_mean": 194.36, + "tps_std": 0.12, "error": false, "error_type": null, "backend": "Vulkan", @@ -15860,8 +12559,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15875,7 +12574,7 @@ "context_tokens": null, "test": "tg128", "tps_mean": 9.96, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "Vulkan", @@ -15888,8 +12587,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15899,10 +12598,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 53.97, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 86.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -15913,11 +12612,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15927,10 +12626,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.47, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 8.71, "tps_std": 0.0, "error": false, "error_type": null, @@ -15941,11 +12640,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15958,8 +12657,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 165.85, - "tps_std": 0.14, + "tps_mean": 167.29, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "Vulkan", @@ -15972,8 +12671,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -15986,7 +12685,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 9.35, + "tps_mean": 9.36, "tps_std": 0.01, "error": false, "error_type": null, @@ -16000,8 +12699,8 @@ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16011,10 +12710,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 73.78, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 102.9, "tps_std": 0.0, "error": false, "error_type": null, @@ -16025,11 +12724,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16039,10 +12738,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 7.95, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 8.57, "tps_std": 0.0, "error": false, "error_type": null, @@ -16053,235 +12752,11 @@ "file_size_gib": 56.89, "name_params_b": 30.53, "quant": "BF16", - "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 573.35, - "tps_std": 5.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 201.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 579.57, - "tps_std": 12.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 58.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 202.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16294,8 +12769,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 568.92, - "tps_std": 3.37, + "tps_mean": 883.95, + "tps_std": 6.89, "error": false, "error_type": null, "backend": "ROCm", @@ -16308,8 +12783,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16322,7 +12797,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.4, + "tps_mean": 58.73, "tps_std": 0.01, "error": false, "error_type": null, @@ -16336,8 +12811,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16347,10 +12822,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 147.26, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 323.55, "tps_std": 0.0, "error": false, "error_type": null, @@ -16361,11 +12836,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16375,10 +12850,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.73, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 20.52, "tps_std": 0.0, "error": false, "error_type": null, @@ -16389,11 +12864,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16406,8 +12881,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 575.31, - "tps_std": 5.34, + "tps_mean": 888.32, + "tps_std": 6.59, "error": false, "error_type": null, "backend": "ROCm", @@ -16420,8 +12895,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16434,8 +12909,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.66, - "tps_std": 0.01, + "tps_mean": 58.55, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -16448,8 +12923,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16459,10 +12934,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 145.86, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 323.18, "tps_std": 0.0, "error": false, "error_type": null, @@ -16473,11 +12948,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16487,10 +12962,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.72, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 20.51, "tps_std": 0.0, "error": false, "error_type": null, @@ -16501,11 +12976,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16518,8 +12993,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 574.31, - "tps_std": 5.95, + "tps_mean": 887.98, + "tps_std": 2.33, "error": false, "error_type": null, "backend": "ROCm", @@ -16532,8 +13007,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16546,7 +13021,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.21, + "tps_mean": 58.53, "tps_std": 0.0, "error": false, "error_type": null, @@ -16560,8 +13035,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16571,10 +13046,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 160.06, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 273.12, "tps_std": 0.0, "error": false, "error_type": null, @@ -16585,11 +13060,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16599,10 +13074,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.7, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.25, "tps_std": 0.0, "error": false, "error_type": null, @@ -16613,11 +13088,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16630,8 +13105,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 576.33, - "tps_std": 7.18, + "tps_mean": 897.81, + "tps_std": 13.27, "error": false, "error_type": null, "backend": "ROCm", @@ -16644,8 +13119,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16658,7 +13133,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.48, + "tps_mean": 58.56, "tps_std": 0.01, "error": false, "error_type": null, @@ -16672,8 +13147,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16683,10 +13158,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 160.69, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 273.07, "tps_std": 0.0, "error": false, "error_type": null, @@ -16697,11 +13172,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16711,10 +13186,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.79, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 40.23, "tps_std": 0.0, "error": false, "error_type": null, @@ -16725,11 +13200,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16742,8 +13217,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 622.81, - "tps_std": 3.95, + "tps_mean": 1045.84, + "tps_std": 8.87, "error": false, "error_type": null, "backend": "ROCm", @@ -16756,8 +13231,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16770,7 +13245,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.81, + "tps_mean": 58.04, "tps_std": 0.01, "error": false, "error_type": null, @@ -16784,8 +13259,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16795,10 +13270,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 109.56, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 310.27, "tps_std": 0.0, "error": false, "error_type": null, @@ -16809,11 +13284,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16823,10 +13298,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.89, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -16837,11 +13312,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16854,8 +13329,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 625.44, - "tps_std": 4.55, + "tps_mean": 1046.62, + "tps_std": 8.31, "error": false, "error_type": null, "backend": "ROCm", @@ -16868,8 +13343,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16882,8 +13357,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.89, - "tps_std": 0.0, + "tps_mean": 57.78, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -16896,8 +13371,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16907,10 +13382,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 108.66, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 313.66, "tps_std": 0.0, "error": false, "error_type": null, @@ -16921,11 +13396,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16935,10 +13410,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.92, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -16949,11 +13424,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16966,8 +13441,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 640.29, - "tps_std": 6.85, + "tps_mean": 1065.87, + "tps_std": 15.74, "error": false, "error_type": null, "backend": "ROCm", @@ -16980,8 +13455,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -16994,7 +13469,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 56.58, + "tps_mean": 57.71, "tps_std": 0.02, "error": false, "error_type": null, @@ -17008,8 +13483,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17019,10 +13494,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 203.61, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 353.38, "tps_std": 0.0, "error": false, "error_type": null, @@ -17033,11 +13508,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17047,10 +13522,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.56, "tps_std": 0.0, "error": false, "error_type": null, @@ -17061,11 +13536,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17078,8 +13553,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 632.09, - "tps_std": 4.14, + "tps_mean": 1056.16, + "tps_std": 8.88, "error": false, "error_type": null, "backend": "ROCm", @@ -17092,8 +13567,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17106,8 +13581,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.49, - "tps_std": 0.0, + "tps_mean": 57.68, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -17120,8 +13595,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17131,10 +13606,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.05, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 341.15, "tps_std": 0.0, "error": false, "error_type": null, @@ -17145,11 +13620,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17159,10 +13634,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.58, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.59, "tps_std": 0.0, "error": false, "error_type": null, @@ -17173,25 +13648,25 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 620.61, - "tps_std": 2.27, + "tps_mean": 961.79, + "tps_std": 10.6, "error": false, "error_type": null, "backend": "ROCm", @@ -17201,18 +13676,466 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.69, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 263.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 27.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 964.88, + "tps_std": 9.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 263.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 27.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 992.39, + "tps_std": 4.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.48, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 286.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 984.99, + "tps_std": 7.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 57.39, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 284.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.29, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 966.17, + "tps_std": 7.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, "context": "default", @@ -17229,431 +14152,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.7, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 617.37, - "tps_std": 6.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.82, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 132.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 628.16, - "tps_std": 1.9, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.33, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 167.01, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 628.24, - "tps_std": 3.85, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 57.33, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 169.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 625.38, - "tps_std": 1.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17663,38 +14166,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 58.11, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 24.53, - "name_params_b": 30.53, - "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.65, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 263.45, "tps_std": 0.0, "error": false, "error_type": null, @@ -17705,11 +14180,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17719,10 +14194,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.88, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 27.74, "tps_std": 0.0, "error": false, "error_type": null, @@ -17733,11 +14208,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17750,8 +14225,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 616.46, - "tps_std": 1.4, + "tps_mean": 960.5, + "tps_std": 8.25, "error": false, "error_type": null, "backend": "ROCm", @@ -17764,8 +14239,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17778,8 +14253,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.69, - "tps_std": 0.01, + "tps_mean": 57.91, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -17792,8 +14267,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17803,10 +14278,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 132.89, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 263.73, "tps_std": 0.0, "error": false, "error_type": null, @@ -17817,11 +14292,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17831,10 +14306,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.87, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 27.72, "tps_std": 0.0, "error": false, "error_type": null, @@ -17845,11 +14320,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17862,8 +14337,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 625.22, - "tps_std": 5.42, + "tps_mean": 986.79, + "tps_std": 6.92, "error": false, "error_type": null, "backend": "ROCm", @@ -17876,8 +14351,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17890,8 +14365,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.35, - "tps_std": 0.0, + "tps_mean": 57.42, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -17904,8 +14379,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17915,10 +14390,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 168.61, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 410.85, "tps_std": 0.0, "error": false, "error_type": null, @@ -17929,11 +14404,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17943,10 +14418,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.27, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.33, "tps_std": 0.0, "error": false, "error_type": null, @@ -17957,11 +14432,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -17974,8 +14449,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 626.37, - "tps_std": 6.13, + "tps_mean": 984.07, + "tps_std": 5.87, "error": false, "error_type": null, "backend": "ROCm", @@ -17988,8 +14463,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18002,8 +14477,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 57.38, - "tps_std": 0.0, + "tps_mean": 57.52, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -18016,8 +14491,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18027,10 +14502,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 170.36, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 282.74, "tps_std": 0.0, "error": false, "error_type": null, @@ -18041,11 +14516,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18055,10 +14530,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.25, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.35, "tps_std": 0.0, "error": false, "error_type": null, @@ -18069,11 +14544,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18086,8 +14561,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1013.46, - "tps_std": 4.96, + "tps_mean": 1129.76, + "tps_std": 4.79, "error": false, "error_type": null, "backend": "Vulkan", @@ -18100,8 +14575,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18114,7 +14589,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 62.1, + "tps_mean": 62.27, "tps_std": 0.01, "error": false, "error_type": null, @@ -18128,8 +14603,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18139,10 +14614,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 69.7, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 135.73, "tps_std": 0.0, "error": false, "error_type": null, @@ -18153,11 +14628,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18167,10 +14642,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 20.21, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 33.09, "tps_std": 0.0, "error": false, "error_type": null, @@ -18181,11 +14656,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18198,8 +14673,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 853.23, - "tps_std": 3.21, + "tps_mean": 905.18, + "tps_std": 4.26, "error": false, "error_type": null, "backend": "Vulkan", @@ -18212,8 +14687,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18226,7 +14701,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 66.93, + "tps_mean": 66.46, "tps_std": 0.05, "error": false, "error_type": null, @@ -18240,8 +14715,8 @@ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18251,10 +14726,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 104.06, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 172.42, "tps_std": 0.0, "error": false, "error_type": null, @@ -18265,11 +14740,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18279,10 +14754,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 29.97, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -18293,235 +14768,11 @@ "file_size_gib": 24.53, "name_params_b": 30.53, "quant": "Q6_K_XL", - "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 673.5, - "tps_std": 8.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 70.76, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 669.29, - "tps_std": 4.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 71.1, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18534,8 +14785,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 674.15, - "tps_std": 10.24, + "tps_mean": 1189.07, + "tps_std": 7.02, "error": false, "error_type": null, "backend": "ROCm", @@ -18548,8 +14799,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18562,8 +14813,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 71.14, - "tps_std": 0.01, + "tps_mean": 71.58, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -18576,8 +14827,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18587,10 +14838,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 150.94, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 332.99, "tps_std": 0.0, "error": false, "error_type": null, @@ -18601,11 +14852,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18615,10 +14866,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.92, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 21.83, "tps_std": 0.0, "error": false, "error_type": null, @@ -18629,11 +14880,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18646,8 +14897,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 666.63, - "tps_std": 5.54, + "tps_mean": 1185.59, + "tps_std": 10.6, "error": false, "error_type": null, "backend": "ROCm", @@ -18660,8 +14911,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18674,7 +14925,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 71.62, + "tps_mean": 71.65, "tps_std": 0.02, "error": false, "error_type": null, @@ -18688,8 +14939,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18699,10 +14950,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 148.47, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 338.96, "tps_std": 0.0, "error": false, "error_type": null, @@ -18713,11 +14964,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18727,10 +14978,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.94, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 21.84, "tps_std": 0.0, "error": false, "error_type": null, @@ -18741,11 +14992,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18758,8 +15009,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 675.1, - "tps_std": 3.41, + "tps_mean": 1185.88, + "tps_std": 9.66, "error": false, "error_type": null, "backend": "ROCm", @@ -18772,8 +15023,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18786,7 +15037,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 71.06, + "tps_mean": 71.64, "tps_std": 0.01, "error": false, "error_type": null, @@ -18800,8 +15051,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18811,10 +15062,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 161.39, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 295.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -18825,11 +15076,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18839,10 +15090,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.57, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 45.59, "tps_std": 0.0, "error": false, "error_type": null, @@ -18853,11 +15104,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18870,8 +15121,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 676.38, - "tps_std": 1.86, + "tps_mean": 1189.9, + "tps_std": 9.82, "error": false, "error_type": null, "backend": "ROCm", @@ -18884,8 +15135,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18898,8 +15149,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 71.44, - "tps_std": 0.02, + "tps_mean": 71.61, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -18912,8 +15163,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18923,10 +15174,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 160.7, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 291.08, "tps_std": 0.0, "error": false, "error_type": null, @@ -18937,11 +15188,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18951,10 +15202,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.64, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 45.74, "tps_std": 0.0, "error": false, "error_type": null, @@ -18965,11 +15216,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -18982,8 +15233,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 663.26, - "tps_std": 2.04, + "tps_mean": 1201.14, + "tps_std": 12.83, "error": false, "error_type": null, "backend": "ROCm", @@ -18996,8 +15247,8 @@ "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -19010,1127 +15261,343 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 68.79, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 108.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 655.75, - "tps_std": 5.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.7, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 109.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 674.37, - "tps_std": 11.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 67.62, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 665.28, - "tps_std": 7.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.57, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 649.91, - "tps_std": 5.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 69.03, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 132.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 658.06, - "tps_std": 8.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 69.11, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 132.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 662.81, - "tps_std": 8.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.77, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 167.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 660.13, - "tps_std": 8.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.73, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 169.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 650.55, - "tps_std": 3.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 69.01, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 132.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 647.31, - "tps_std": 2.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 69.01, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 660.75, - "tps_std": 2.92, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", "tps_mean": 68.92, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 319.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 30.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1208.02, + "tps_std": 13.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.87, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 321.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 30.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1225.68, + "tps_std": 19.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.13, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 387.32, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 43.53, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1231.06, + "tps_std": 2.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.08, "tps_std": 0.01, "error": false, "error_type": null, @@ -20141,11 +15608,767 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 361.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 44.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1172.29, + "tps_std": 9.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.19, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 270.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 29.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1169.43, + "tps_std": 4.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.31, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 270.83, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 30.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1183.05, + "tps_std": 9.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.17, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 292.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 43.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1195.38, + "tps_std": 5.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.06, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 287.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 43.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1166.57, + "tps_std": 8.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.37, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 270.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 30.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1167.1, + "tps_std": 5.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 69.33, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 270.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 30.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1194.63, + "tps_std": 7.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -20155,3426 +16378,878 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 166.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 662.51, - "tps_std": 3.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 68.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 170.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 774.61, - "tps_std": 2.12, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 81.31, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 68.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 832.44, - "tps_std": 3.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 87.24, - "tps_std": 0.18, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 100.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 33.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 17.35, - "name_params_b": 30.53, - "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 602.73, - "tps_std": 3.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.21, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 201.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 587.21, - "tps_std": 4.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 200.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 601.39, - "tps_std": 7.96, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.54, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 145.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 585.7, - "tps_std": 2.25, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 148.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 601.34, - "tps_std": 1.6, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 160.98, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 585.58, - "tps_std": 4.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.38, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 163.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 652.89, - "tps_std": 1.7, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.1, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 110.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.52, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 638.38, - "tps_std": 7.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.12, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 108.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "caca0d55c", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 648.39, - "tps_std": 23.62, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.52, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 218.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 640.53, - "tps_std": 6.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.87, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 207.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "86f1f4411", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 650.26, - "tps_std": 1.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.8, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 132.22, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 634.84, - "tps_std": 9.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.78, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.93, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "f1840a25d", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 649.99, - "tps_std": 3.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.58, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 166.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 640.61, - "tps_std": 7.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.69, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 171.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "677be4d78", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 648.21, - "tps_std": 4.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.85, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.54, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 631.07, - "tps_std": 4.7, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.89, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 131.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "b447a9a4b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 654.79, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.22, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 237.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 633.61, - "tps_std": 5.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 44.67, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 221.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "fa5c85a8b", - "number": "7085" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1152.51, - "tps_std": 1.98, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "ab5783eb4", - "number": "7089" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.58, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "ab5783eb4", - "number": "7089" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 71.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ab5783eb4", - "number": "7089" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ab5783eb4", - "number": "7089" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 916.61, - "tps_std": 3.21, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "0a3857fe0", - "number": "7089" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 45.81, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "0a3857fe0", - "number": "7089" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 108.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "0a3857fe0", - "number": "7089" - } - }, - { - "model": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "model_clean": "Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 30.53, - "file_size_gib": 33.51, - "name_params_b": 30.53, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "0a3857fe0", - "number": "7089" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 248.51, - "tps_std": 1.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.33, - "tps_std": 0.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 302.76, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.42, - "tps_std": 0.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 242.26, - "tps_std": 2.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.98, - "tps_std": 0.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 287.99, - "tps_std": 2.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 26.05, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 318.5, - "tps_std": 0.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.75, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 325.56, - "tps_std": 2.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.95, - "tps_std": 0.39, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 320.64, - "tps_std": 0.8, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.55, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 325.06, - "tps_std": 1.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.14, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 264.35, - "tps_std": 7.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.89, - "tps_std": 0.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 321.93, - "tps_std": 4.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.64, - "tps_std": 1.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 256.63, - "tps_std": 1.91, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.18, + "tps_mean": 68.98, "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__fa1.log", + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 324.95, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.54, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 255.48, - "tps_std": 2.19, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.31, - "tps_std": 0.65, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 327.95, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.64, - "tps_std": 1.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 79.67, - "file_size_gib": 79.57, - "name_params_b": 79.67, - "quant": "Q8_K_XL", - "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "03d9a77b8", - "number": "7278" - } - }, - { - "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", - "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "env": "rocm7_rc", "env_base": "rocm7_rc", "env_variant": null, "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 292.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 43.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1202.35, + "tps_std": 10.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 68.96, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 292.54, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 44.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 264.12, + "tps_mean": 825.86, + "tps_std": 2.68, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 80.94, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 130.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 37.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 864.66, + "tps_std": 2.72, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 83.07, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 168.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 47.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 345.64, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.88, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 344.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 20.67, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 356.53, + "tps_std": 3.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.26, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 352.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 24.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 346.29, + "tps_std": 1.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.92, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 379.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 360.22, + "tps_std": 1.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.35, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 386.65, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 25.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 573.57, "tps_std": 2.61, "error": false, "error_type": null, @@ -23585,11 +17260,1069 @@ "file_size_gib": 79.57, "name_params_b": 79.67, "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 464.91, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 595.88, + "tps_std": 2.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.34, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 480.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 23.3, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 576.31, + "tps_std": 0.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 25.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 489.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 24.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 590.68, + "tps_std": 0.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.38, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 503.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 25.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 571.05, + "tps_std": 4.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.45, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 444.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 21.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 594.4, + "tps_std": 3.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.73, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 451.46, + "tps_std": 0.0, + "error": true, + "error_type": "runtime", + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 568.38, + "tps_std": 2.63, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.5, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 462.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 22.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 587.5, + "tps_std": 6.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 479.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 21.62, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 567.13, + "tps_std": 2.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.44, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 438.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 23.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 594.63, + "tps_std": 11.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 26.76, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 442.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 23.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 570.28, + "tps_std": 3.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -23602,8 +18335,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 24.94, - "tps_std": 0.15, + "tps_mean": 26.05, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -23616,8 +18349,64 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 473.92, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 24.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -23630,8 +18419,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 328.73, - "tps_std": 4.15, + "tps_mean": 588.8, + "tps_std": 0.73, "error": false, "error_type": null, "backend": "ROCm", @@ -23644,8 +18433,8 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -23658,8 +18447,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 25.46, - "tps_std": 0.21, + "tps_mean": 26.79, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -23672,8 +18461,64 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 488.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 21.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -23686,8 +18531,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 417.28, - "tps_std": 0.97, + "tps_mean": 406.08, + "tps_std": 1.14, "error": false, "error_type": null, "backend": "Vulkan", @@ -23700,8 +18545,8 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -23714,8 +18559,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 30.31, - "tps_std": 0.01, + "tps_mean": 33.67, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -23728,8 +18573,64 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 194.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 27.27, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -23742,8 +18643,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 333.74, - "tps_std": 1.19, + "tps_mean": 326.83, + "tps_std": 0.94, "error": false, "error_type": null, "backend": "Vulkan", @@ -23756,8 +18657,8 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -23770,8 +18671,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 27.76, - "tps_std": 0.01, + "tps_mean": 30.18, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -23784,232 +18685,64 @@ "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "03d9a77b8", - "number": "7278" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 751.05, - "tps_std": 61.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.21, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 220.99, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1.log", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 323.48, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 27.91, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "ROCm", + "backend": "Vulkan", "ngl": 99, "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, + "params_b": 79.67, + "file_size_gib": 79.57, + "name_params_b": 79.67, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 799.75, - "tps_std": 0.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 335.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24022,8 +18755,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 795.35, - "tps_std": 0.84, + "tps_mean": 421.94, + "tps_std": 0.25, "error": false, "error_type": null, "backend": "ROCm", @@ -24036,8 +18769,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24050,7 +18783,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.16, + "tps_mean": 14.17, "tps_std": 0.0, "error": false, "error_type": null, @@ -24064,8 +18797,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24075,10 +18808,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 354.28, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 332.05, "tps_std": 0.0, "error": false, "error_type": null, @@ -24089,11 +18822,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24103,10 +18836,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.03, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 11.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -24117,11 +18850,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24134,8 +18867,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 804.75, - "tps_std": 0.44, + "tps_mean": 424.57, + "tps_std": 0.4, "error": false, "error_type": null, "backend": "ROCm", @@ -24148,8 +18881,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24162,7 +18895,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.16, + "tps_mean": 14.17, "tps_std": 0.0, "error": false, "error_type": null, @@ -24176,8 +18909,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24187,11 +18920,11 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 369.35, - "tps_std": 42.57, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 325.94, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -24201,11 +18934,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24215,11 +18948,11 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.04, - "tps_std": 0.01, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 11.85, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -24229,11 +18962,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24246,8 +18979,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 816.42, - "tps_std": 1.0, + "tps_mean": 429.89, + "tps_std": 0.35, "error": false, "error_type": null, "backend": "ROCm", @@ -24260,8 +18993,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24274,6 +19007,118 @@ "context": "default", "context_tokens": null, "test": "tg128", + "tps_mean": 14.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 309.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 430.69, + "tps_std": 0.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", "tps_mean": 14.25, "tps_std": 0.0, "error": false, @@ -24285,95 +19130,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 345.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 826.54, - "tps_std": 0.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24383,10 +19144,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.23, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 338.46, "tps_std": 0.0, "error": false, "error_type": null, @@ -24397,11 +19158,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24411,10 +19172,10 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 371.28, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -24425,39 +19186,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24470,8 +19203,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 811.49, - "tps_std": 0.16, + "tps_mean": 897.73, + "tps_std": 0.63, "error": false, "error_type": null, "backend": "ROCm", @@ -24484,8 +19217,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24512,8 +19245,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24523,10 +19256,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 151.46, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 288.29, "tps_std": 0.0, "error": false, "error_type": null, @@ -24537,11 +19270,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24551,10 +19284,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.18, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.01, "tps_std": 0.0, "error": false, "error_type": null, @@ -24565,11 +19298,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24582,8 +19315,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 819.41, - "tps_std": 1.64, + "tps_mean": 904.28, + "tps_std": 1.51, "error": false, "error_type": null, "backend": "ROCm", @@ -24596,8 +19329,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24624,8 +19357,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24635,10 +19368,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 149.96, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 290.79, "tps_std": 0.0, "error": false, "error_type": null, @@ -24649,11 +19382,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24663,10 +19396,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.19, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -24677,11 +19410,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24694,8 +19427,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 826.24, - "tps_std": 1.79, + "tps_mean": 913.75, + "tps_std": 0.6, "error": false, "error_type": null, "backend": "ROCm", @@ -24708,8 +19441,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24722,118 +19455,6 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 267.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 833.1, - "tps_std": 1.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", "tps_mean": 14.21, "tps_std": 0.0, "error": false, @@ -24845,11 +19466,95 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 425.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.72, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 919.42, + "tps_std": 0.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24859,10 +19564,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 252.65, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.19, "tps_std": 0.0, "error": false, "error_type": null, @@ -24873,11 +19578,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -24887,10 +19592,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.56, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 430.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -24901,361 +19606,25 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 797.32, - "tps_std": 10.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 180.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 810.77, - "tps_std": 1.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.18, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 166.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 819.82, - "tps_std": 2.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 269.95, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 832.48, - "tps_std": 2.03, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.78, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -25265,19 +19634,271 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 886.59, + "tps_std": 0.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 303.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 890.97, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 304.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 910.34, + "tps_std": 0.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, @@ -25293,360 +19914,24 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 266.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 802.23, - "tps_std": 0.79, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 167.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 790.38, - "tps_std": 0.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 170.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 10.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 813.63, - "tps_std": 8.49, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 14.21, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 11.77, - "file_size_gib": 13.4, - "name_params_b": 11.77, - "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-12b-it-UD-Q8_K_XL", - "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 265.54, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 391.58, "tps_std": 0.0, "error": false, "error_type": null, @@ -25657,24 +19942,24 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.8, "tps_std": 0.0, "error": false, "error_type": null, @@ -25685,24 +19970,24 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 813.23, + "tps_mean": 917.22, "tps_std": 0.99, "error": false, "error_type": null, @@ -25713,23 +19998,359 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", + "tps_mean": 14.28, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 394.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 882.81, + "tps_std": 0.63, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 302.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 891.69, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 306.94, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.03, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 913.03, + "tps_std": 1.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", "tps_mean": 14.26, "tps_std": 0.0, "error": false, @@ -25741,11 +20362,95 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 399.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 917.06, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25755,10 +20460,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 252.49, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.27, "tps_std": 0.0, "error": false, "error_type": null, @@ -25769,11 +20474,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25783,10 +20488,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 391.45, "tps_std": 0.0, "error": false, "error_type": null, @@ -25797,11 +20502,39 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25814,8 +20547,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 590.41, - "tps_std": 71.66, + "tps_mean": 690.02, + "tps_std": 1.72, "error": false, "error_type": null, "backend": "Vulkan", @@ -25828,8 +20561,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25842,7 +20575,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 14.51, + "tps_mean": 14.56, "tps_std": 0.0, "error": false, "error_type": null, @@ -25856,8 +20589,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25867,10 +20600,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 17.29, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 54.64, "tps_std": 0.0, "error": false, "error_type": null, @@ -25881,11 +20614,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25895,10 +20628,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 11.88, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -25909,11 +20642,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25926,8 +20659,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 533.84, - "tps_std": 0.83, + "tps_mean": 549.78, + "tps_std": 1.79, "error": false, "error_type": null, "backend": "Vulkan", @@ -25940,8 +20673,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25954,7 +20687,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 13.99, + "tps_mean": 13.95, "tps_std": 0.0, "error": false, "error_type": null, @@ -25968,8 +20701,8 @@ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -25979,10 +20712,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 219.21, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 270.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -25993,11 +20726,11 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26007,10 +20740,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.99, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 11.42, "tps_std": 0.0, "error": false, "error_type": null, @@ -26021,25 +20754,25 @@ "file_size_gib": 13.4, "name_params_b": 11.77, "quant": "Q8_K_XL", - "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "env_variant": "7alpha-rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 439.12, - "tps_std": 31.65, + "tps_mean": 468.46, + "tps_std": 1.81, "error": false, "error_type": null, "backend": "ROCm", @@ -26049,19 +20782,19 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved", + "env": "rocm-7alpha-rocwmma", "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "env_variant": "7alpha-rocwmma", "fa": true, "context": "default", "context_tokens": null, @@ -26077,207 +20810,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 112.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 513.7, - "tps_std": 0.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.01, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 115.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 428.59, - "tps_std": 48.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26287,10 +20824,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.02, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 167.6, "tps_std": 0.0, "error": false, "error_type": null, @@ -26301,11 +20838,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26315,10 +20852,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 93.71, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.56, "tps_std": 0.0, "error": false, "error_type": null, @@ -26329,39 +20866,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.11, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26374,231 +20883,7 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 527.23, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 95.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 409.77, - "tps_std": 60.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 4.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 192.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 528.54, + "tps_mean": 522.97, "tps_std": 0.37, "error": false, "error_type": null, @@ -26609,19 +20894,131 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-hblt0", + "env": "rocm-7alpha-rocwmma-hblt0", "env_base": "rocm", - "env_variant": "7alpha-hblt0", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 170.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 477.16, + "tps_std": 1.86, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", "fa": true, "context": "default", "context_tokens": null, @@ -26637,11 +21034,95 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 237.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 527.33, + "tps_std": 1.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26651,90 +21132,6 @@ "env_base": "rocm", "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 199.09, - "tps_std": 3.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 469.22, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 27.01, - "file_size_gib": 50.31, - "name_params_b": 27.01, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, "context": "default", "context_tokens": null, "test": "tg128", @@ -26749,11 +21146,95 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 270.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 468.3, + "tps_std": 0.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26763,10 +21244,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 92.1, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -26777,11 +21258,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26791,10 +21272,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.42, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 167.49, "tps_std": 0.0, "error": false, "error_type": null, @@ -26805,11 +21286,39 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26822,8 +21331,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 524.21, - "tps_std": 1.37, + "tps_mean": 525.67, + "tps_std": 0.68, "error": false, "error_type": null, "backend": "ROCm", @@ -26836,8 +21345,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26850,7 +21359,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.01, + "tps_mean": 4.0, "tps_std": 0.0, "error": false, "error_type": null, @@ -26864,8 +21373,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26875,10 +21384,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 93.57, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 169.41, "tps_std": 0.0, "error": false, "error_type": null, @@ -26889,11 +21398,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26903,10 +21412,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.42, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.71, "tps_std": 0.0, "error": false, "error_type": null, @@ -26917,11 +21426,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26934,8 +21443,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 472.47, - "tps_std": 0.58, + "tps_mean": 472.62, + "tps_std": 0.27, "error": false, "error_type": null, "backend": "ROCm", @@ -26948,8 +21457,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26976,8 +21485,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -26987,10 +21496,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 182.47, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 246.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -27001,11 +21510,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27015,10 +21524,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.72, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -27029,11 +21538,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27046,8 +21555,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 530.73, - "tps_std": 0.34, + "tps_mean": 530.96, + "tps_std": 0.63, "error": false, "error_type": null, "backend": "ROCm", @@ -27060,8 +21569,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27088,8 +21597,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27099,10 +21608,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 204.11, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 285.15, "tps_std": 0.0, "error": false, "error_type": null, @@ -27113,11 +21622,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27127,10 +21636,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.72, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.86, "tps_std": 0.0, "error": false, "error_type": null, @@ -27141,25 +21650,25 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 463.62, - "tps_std": 0.34, + "tps_mean": 463.74, + "tps_std": 0.73, "error": false, "error_type": null, "backend": "ROCm", @@ -27169,24 +21678,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -27197,24 +21706,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 113.46, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 193.84, "tps_std": 0.0, "error": false, "error_type": null, @@ -27225,24 +21734,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.43, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.75, "tps_std": 0.0, "error": false, "error_type": null, @@ -27253,25 +21762,25 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 522.69, - "tps_std": 0.87, + "tps_mean": 522.71, + "tps_std": 0.55, "error": false, "error_type": null, "backend": "ROCm", @@ -27281,24 +21790,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -27309,24 +21818,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 115.46, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 205.34, "tps_std": 0.0, "error": false, "error_type": null, @@ -27337,24 +21846,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.43, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -27365,25 +21874,25 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 434.79, - "tps_std": 46.53, + "tps_mean": 466.55, + "tps_std": 0.52, "error": false, "error_type": null, "backend": "ROCm", @@ -27393,24 +21902,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -27421,24 +21930,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 179.1, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 234.18, "tps_std": 0.0, "error": false, "error_type": null, @@ -27449,24 +21958,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.74, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.89, "tps_std": 0.0, "error": false, "error_type": null, @@ -27477,25 +21986,25 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 524.39, - "tps_std": 1.39, + "tps_mean": 526.17, + "tps_std": 0.74, "error": false, "error_type": null, "backend": "ROCm", @@ -27505,24 +22014,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -27533,24 +22042,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 195.6, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 269.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -27561,24 +22070,24 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.74, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.89, "tps_std": 0.0, "error": false, "error_type": null, @@ -27589,11 +22098,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27606,8 +22115,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 463.45, - "tps_std": 0.58, + "tps_mean": 462.78, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -27620,8 +22129,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27648,8 +22157,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27659,10 +22168,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 111.71, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 194.64, "tps_std": 0.0, "error": false, "error_type": null, @@ -27673,11 +22182,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27687,10 +22196,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.44, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -27701,11 +22210,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27718,8 +22227,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 499.44, - "tps_std": 0.09, + "tps_mean": 499.88, + "tps_std": 0.71, "error": false, "error_type": null, "backend": "ROCm", @@ -27732,8 +22241,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27760,8 +22269,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27771,10 +22280,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 115.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 195.11, "tps_std": 0.0, "error": false, "error_type": null, @@ -27785,11 +22294,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27799,10 +22308,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.44, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -27813,38 +22322,13 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, - { - "model": "gemma-3-27b-it-BF16-00001-of-00002", - "model_clean": "gemma-3-27b-it-BF16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": false, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": false, - "error_type": null, - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log", - "rpc": false, - "build": null - }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -27855,8 +22339,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 470.06, - "tps_std": 0.56, + "tps_mean": 474.02, + "tps_std": 0.22, "error": false, "error_type": null, "backend": "ROCm", @@ -27869,8 +22353,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27883,7 +22367,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 4.04, + "tps_mean": 4.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -27897,8 +22381,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27908,10 +22392,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 177.69, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 308.72, "tps_std": 0.0, "error": false, "error_type": null, @@ -27922,11 +22406,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27936,10 +22420,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.74, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.85, "tps_std": 0.0, "error": false, "error_type": null, @@ -27950,11 +22434,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -27967,8 +22451,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 501.79, - "tps_std": 0.45, + "tps_mean": 506.0, + "tps_std": 0.53, "error": false, "error_type": null, "backend": "ROCm", @@ -27981,8 +22465,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28009,8 +22493,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28020,10 +22504,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 198.39, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 320.96, "tps_std": 0.0, "error": false, "error_type": null, @@ -28034,11 +22518,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28048,10 +22532,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.74, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.89, "tps_std": 0.0, "error": false, "error_type": null, @@ -28062,11 +22546,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28101,8 +22585,8 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, + "context": "longctx16384", + "context_tokens": 16384, "test": null, "tps_mean": null, "tps_std": null, @@ -28115,7 +22599,7 @@ "file_size_gib": null, "name_params_b": null, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": null }, @@ -28129,8 +22613,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 106.82, - "tps_std": 1.0, + "tps_mean": 107.99, + "tps_std": 1.5, "error": false, "error_type": null, "backend": "Vulkan", @@ -28143,8 +22627,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28157,7 +22641,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 3.92, + "tps_mean": 3.93, "tps_std": 0.0, "error": false, "error_type": null, @@ -28171,8 +22655,8 @@ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28182,10 +22666,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 62.49, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 67.01, "tps_std": 0.0, "error": false, "error_type": null, @@ -28196,11 +22680,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28210,10 +22694,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.63, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 3.76, "tps_std": 0.0, "error": false, "error_type": null, @@ -28224,235 +22708,11 @@ "file_size_gib": 50.31, "name_params_b": 27.01, "quant": "BF16", - "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2224.91, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 75.58, - "tps_std": 9.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1239.19, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2106.39, - "tps_std": 2.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 84.35, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1185.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28465,8 +22725,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2245.25, - "tps_std": 4.85, + "tps_mean": 2767.54, + "tps_std": 1.34, "error": false, "error_type": null, "backend": "ROCm", @@ -28479,8 +22739,8 @@ "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -28493,399 +22753,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 75.82, - "tps_std": 8.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1224.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2111.57, - "tps_std": 5.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 81.04, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1190.6, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2256.38, - "tps_std": 8.98, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 84.67, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1206.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2154.09, - "tps_std": 4.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 84.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1291.24, - "tps_std": 6.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 59.61, + "tps_mean": 81.17, "tps_std": 0.03, "error": false, "error_type": null, @@ -28896,25 +22764,81 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx32768.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1439.14, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 68.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2283.48, - "tps_std": 2.94, + "tps_mean": 2762.69, + "tps_std": 4.25, "error": false, "error_type": null, "backend": "ROCm", @@ -28924,248 +22848,24 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 78.74, - "tps_std": 0.13, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 898.63, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2154.45, - "tps_std": 10.83, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 76.62, - "tps_std": 3.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 855.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 57.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2276.8, - "tps_std": 11.52, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 82.07, + "tps_mean": 81.08, "tps_std": 0.02, "error": false, "error_type": null, @@ -29176,1752 +22876,72 @@ "file_size_gib": 1.8, "name_params_b": 3.88, "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1497.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2161.24, - "tps_std": 6.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 82.35, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1440.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2269.02, - "tps_std": 4.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 59.93, - "tps_std": 6.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1031.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 56.77, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2141.35, - "tps_std": 2.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 64.63, - "tps_std": 11.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1002.59, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 56.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2261.65, - "tps_std": 12.88, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 62.69, - "tps_std": 7.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1160.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2171.0, - "tps_std": 3.48, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 65.68, - "tps_std": 10.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1240.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2257.61, - "tps_std": 5.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 78.84, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1046.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 56.97, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2102.34, - "tps_std": 8.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 67.4, - "tps_std": 10.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1033.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 56.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2275.52, - "tps_std": 10.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.45, - "tps_std": 10.56, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1168.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 2114.7, - "tps_std": 2.89, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 68.07, - "tps_std": 12.18, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 1227.13, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 58.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1229.75, - "tps_std": 236.47, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 75.94, - "tps_std": 2.23, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 145.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 64.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1116.46, - "tps_std": 204.92, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 78.27, - "tps_std": 2.29, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 646.29, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gemma-3-4b-it-Q3_K_S", - "model_clean": "gemma-3-4b-it-Q3_K_S", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 45.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 3.88, - "file_size_gib": 1.8, - "name_params_b": 3.88, - "quant": "Q3_K_S", - "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 669.82, - "tps_std": 4.74, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 325.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 673.38, - "tps_std": 9.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 332.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 656.31, - "tps_std": 30.59, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 223.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", "env": "rocm-7alpha-rocwmma-hblt0", "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 680.04, - "tps_std": 3.32, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.34, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1442.24, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", "env": "rocm-7alpha-rocwmma-hblt0", "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 224.06, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 68.87, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", "env": "rocm-7alpha", "env_base": "rocm", "env_variant": "7alpha", @@ -30929,587 +22949,587 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 667.33, - "tps_std": 4.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.43, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 262.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 674.51, - "tps_std": 4.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 335.42, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 778.24, - "tps_std": 5.71, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.19, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 301.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 783.56, - "tps_std": 11.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.18, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 276.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 16.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 779.12, - "tps_std": 1.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 36.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 335.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 774.77, + "tps_mean": 2759.74, "tps_std": 13.26, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 84.86, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1331.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 71.38, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2748.02, + "tps_std": 15.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 84.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1341.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 71.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2907.52, + "tps_std": 4.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.61, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1365.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 66.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2902.86, + "tps_std": 2.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.68, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1384.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 66.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2868.25, + "tps_std": 16.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 80.93, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1413.39, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 67.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2874.9, + "tps_std": 17.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", "env": "rocm6_4_4-hblt0", "env_base": "rocm6_4_4", "env_variant": "hblt0", @@ -31517,1422 +23537,1198 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 37.2, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 454.32, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 30.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 769.93, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.36, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 223.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 771.12, - "tps_std": 3.66, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 226.41, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.75, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 757.6, - "tps_std": 0.73, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 270.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 29.89, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 763.2, - "tps_std": 10.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 283.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 29.9, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 775.86, - "tps_std": 2.82, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.4, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 225.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 776.83, - "tps_std": 4.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 223.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 13.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 750.63, - "tps_std": 5.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 281.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 29.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 756.08, - "tps_std": 9.81, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 37.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 284.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 29.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 720.94, - "tps_std": 1.15, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 35.76, + "tps_mean": 81.07, "tps_std": 0.02, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 166.61, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1414.92, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.02, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 68.13, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2812.03, + "tps_std": 15.7, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.66, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1347.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 66.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2820.5, + "tps_std": 10.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 78.66, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1390.56, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 67.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 513.71, - "tps_std": 2.7, + "tps_mean": 2853.13, + "tps_std": 21.11, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 34.86, + "tps_mean": 81.93, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1368.5, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 68.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2846.23, + "tps_std": 16.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 81.96, "tps_std": 0.01, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 157.1, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1361.15, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-F16", - "model_clean": "gpt-oss-120b-F16", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.67, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 67.99, "tps_std": 0.0, "error": false, "error_type": null, - "backend": "Vulkan", + "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 60.87, - "name_params_b": 116.83, - "quant": "F16", - "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 673.11, - "tps_std": 6.92, + "tps_mean": 2843.92, + "tps_std": 0.49, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 52.11, - "tps_std": 0.0, + "tps_mean": 78.68, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 331.82, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1377.32, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 36.0, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 66.63, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "31df4608", - "number": "7038" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 675.94, - "tps_std": 0.23, + "tps_mean": 2841.34, + "tps_std": 6.05, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 52.09, - "tps_std": 0.0, + "tps_mean": 78.55, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 326.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1340.85, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 27.34, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 67.21, "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2888.98, + "tps_std": 3.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 81.99, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1300.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 65.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2874.43, + "tps_std": 2.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 81.99, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 1332.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 67.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1798.72, + "tps_std": 4.5, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 90.73, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 466.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 74.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1633.15, + "tps_std": 4.31, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 85.91, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 776.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 58.76, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -32945,8 +24741,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 665.6, - "tps_std": 9.61, + "tps_mean": 274.17, + "tps_std": 2.38, "error": false, "error_type": null, "backend": "ROCm", @@ -32959,8 +24755,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -32973,118 +24769,6 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.94, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 224.65, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.73, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 639.77, - "tps_std": 57.43, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", "tps_mean": 51.85, "tps_std": 0.0, "error": false, @@ -33096,11 +24780,95 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 303.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 29.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 276.82, + "tps_std": 4.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33110,318 +24878,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 224.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.84, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 652.18, - "tps_std": 8.35, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 52.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 254.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 657.18, - "tps_std": 7.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 52.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 336.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 767.82, - "tps_std": 6.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.71, + "tps_mean": 51.95, "tps_std": 0.01, "error": false, "error_type": null, @@ -33432,11 +24892,319 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 303.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 29.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 274.29, + "tps_std": 0.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.18, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 301.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha", + "env_base": "rocm", + "env_variant": "7alpha", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 274.47, + "tps_std": 1.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.23, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 289.74, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.82, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 657.63, + "tps_std": 7.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33446,10 +25214,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 301.9, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.65, "tps_std": 0.0, "error": false, "error_type": null, @@ -33460,11 +25228,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33474,10 +25242,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.75, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 479.02, "tps_std": 0.0, "error": false, "error_type": null, @@ -33488,11 +25256,39 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.15, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33505,8 +25301,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 782.34, - "tps_std": 9.39, + "tps_mean": 649.55, + "tps_std": 10.69, "error": false, "error_type": null, "backend": "ROCm", @@ -33519,8 +25315,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33533,7 +25329,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.76, + "tps_mean": 51.68, "tps_std": 0.0, "error": false, "error_type": null, @@ -33547,8 +25343,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33558,10 +25354,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 293.43, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 480.59, "tps_std": 0.0, "error": false, "error_type": null, @@ -33572,11 +25368,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33586,10 +25382,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.44, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.88, "tps_std": 0.0, "error": false, "error_type": null, @@ -33600,11 +25396,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33617,8 +25413,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 762.33, - "tps_std": 0.82, + "tps_mean": 654.41, + "tps_std": 2.17, "error": false, "error_type": null, "backend": "ROCm", @@ -33631,8 +25427,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -33645,119 +25441,679 @@ "context": "default", "context_tokens": null, "test": "tg128", + "tps_mean": 51.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 470.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 45.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 658.64, + "tps_std": 9.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.93, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 472.41, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 45.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 637.48, + "tps_std": 24.73, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.66, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 412.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 653.6, + "tps_std": 10.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 409.77, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.99, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 639.37, + "tps_std": 5.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 496.89, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 43.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 646.49, + "tps_std": 8.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 501.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 44.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 645.91, + "tps_std": 13.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", "tps_mean": 51.67, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 341.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 778.37, - "tps_std": 3.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.63, "tps_std": 0.01, "error": false, "error_type": null, @@ -33768,81 +26124,81 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 358.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 39.51, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", "env_variant": "rocwmma", "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 410.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.97, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 755.98, - "tps_std": 7.49, + "tps_mean": 653.08, + "tps_std": 2.58, "error": false, "error_type": null, "backend": "ROCm", @@ -33852,19 +26208,19 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, @@ -33880,543 +26236,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 226.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.71, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 766.72, - "tps_std": 15.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 225.8, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.74, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 742.07, - "tps_std": 2.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 263.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 763.92, - "tps_std": 4.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.34, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 367.14, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 753.49, - "tps_std": 1.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.76, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 226.23, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.45, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 774.4, - "tps_std": 6.21, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34426,38 +26250,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 226.01, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 413.56, "tps_std": 0.0, "error": false, "error_type": null, @@ -34468,11 +26264,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34482,10 +26278,10 @@ "env_base": "rocm7_rc", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.73, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 28.99, "tps_std": 0.0, "error": false, "error_type": null, @@ -34496,11 +26292,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34513,8 +26309,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 746.02, - "tps_std": 1.26, + "tps_mean": 637.84, + "tps_std": 10.76, "error": false, "error_type": null, "backend": "ROCm", @@ -34527,8 +26323,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34541,7 +26337,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.2, + "tps_mean": 51.22, "tps_std": 0.01, "error": false, "error_type": null, @@ -34555,8 +26351,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34566,10 +26362,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 274.48, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 522.65, "tps_std": 0.0, "error": false, "error_type": null, @@ -34580,11 +26376,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34594,10 +26390,10 @@ "env_base": "rocm7_rc", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.72, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 43.99, "tps_std": 0.0, "error": false, "error_type": null, @@ -34608,11 +26404,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34625,8 +26421,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 752.08, - "tps_std": 9.39, + "tps_mean": 642.63, + "tps_std": 7.27, "error": false, "error_type": null, "backend": "ROCm", @@ -34639,8 +26435,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34653,7 +26449,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.38, + "tps_mean": 51.17, "tps_std": 0.01, "error": false, "error_type": null, @@ -34667,8 +26463,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34678,10 +26474,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 284.03, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 532.69, "tps_std": 0.0, "error": false, "error_type": null, @@ -34692,11 +26488,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34706,10 +26502,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 38.23, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 44.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -34720,11 +26516,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34737,8 +26533,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 759.84, - "tps_std": 2.0, + "tps_mean": 792.77, + "tps_std": 0.78, "error": false, "error_type": null, "backend": "Vulkan", @@ -34751,8 +26547,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34765,8 +26561,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 52.66, - "tps_std": 0.01, + "tps_mean": 52.34, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -34779,8 +26575,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34790,10 +26586,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 169.89, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 266.3, "tps_std": 0.0, "error": false, "error_type": null, @@ -34804,11 +26600,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34818,10 +26614,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 32.63, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 39.24, "tps_std": 0.0, "error": false, "error_type": null, @@ -34832,11 +26628,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34849,8 +26645,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 534.51, - "tps_std": 0.52, + "tps_mean": 543.66, + "tps_std": 0.88, "error": false, "error_type": null, "backend": "Vulkan", @@ -34863,8 +26659,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34877,8 +26673,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 54.67, - "tps_std": 0.1, + "tps_mean": 56.37, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "Vulkan", @@ -34891,8 +26687,8 @@ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34902,10 +26698,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 159.72, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 299.4, "tps_std": 0.0, "error": false, "error_type": null, @@ -34916,11 +26712,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -34930,10 +26726,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 37.31, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 45.88, "tps_std": 0.0, "error": false, "error_type": null, @@ -34944,2475 +26740,11 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1430.02, - "tps_std": 3.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 551.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 24.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1438.05, - "tps_std": 10.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 555.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 24.31, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1494.1, - "tps_std": 6.72, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.26, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 357.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1450.79, - "tps_std": 15.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 357.94, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1430.88, - "tps_std": 12.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 419.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha", - "env_base": "rocm", - "env_variant": "7alpha", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 24.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1399.32, - "tps_std": 6.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.35, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 556.09, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 24.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1500.8, - "tps_std": 17.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.24, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 479.16, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1440.0, - "tps_std": 14.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.28, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 478.62, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1449.04, - "tps_std": 10.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 27.86, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 619.3, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.38, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1444.02, - "tps_std": 15.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 612.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1499.09, - "tps_std": 21.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 350.48, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1448.72, - "tps_std": 23.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.37, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 352.46, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1437.87, - "tps_std": 9.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 443.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1405.44, - "tps_std": 19.51, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 454.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.1, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7.1__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1486.29, - "tps_std": 11.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 352.25, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1426.84, - "tps_std": 4.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 351.69, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 17.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1424.62, - "tps_std": 5.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.27, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 446.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.05, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1380.59, - "tps_std": 26.7, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.29, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 465.5, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 25.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 574.29, - "tps_std": 4.39, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 17.78, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 221.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 448.9, - "tps_std": 3.43, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 16.15, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 243.39, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-F32", - "model_clean": "gpt-oss-20b-F32", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 14.76, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 38.97, - "name_params_b": 20.91, - "quant": "F32", - "log": "results/gpt-oss-20b-F32__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1333.81, - "tps_std": 9.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.64, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 537.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.44, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1385.54, - "tps_std": 24.93, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 544.55, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.36, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37425,8 +26757,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1384.08, - "tps_std": 13.61, + "tps_mean": 791.2, + "tps_std": 5.92, "error": false, "error_type": null, "backend": "ROCm", @@ -37439,8 +26771,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37453,118 +26785,6 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.16, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 349.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1419.39, - "tps_std": 12.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4db63cdde", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-rocwmma-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", "tps_mean": 73.24, "tps_std": 0.01, "error": false, @@ -37576,11 +26796,95 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 539.7, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 795.34, + "tps_std": 8.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37590,10 +26894,38 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 353.6, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.28, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-rocwmma-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 538.62, "tps_std": 0.0, "error": false, "error_type": null, @@ -37604,11 +26936,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37618,10 +26950,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.87, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.21, "tps_std": 0.0, "error": false, "error_type": null, @@ -37632,11 +26964,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37649,8 +26981,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1326.8, - "tps_std": 18.91, + "tps_mean": 788.07, + "tps_std": 19.38, "error": false, "error_type": null, "backend": "ROCm", @@ -37663,8 +26995,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37677,7 +27009,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.5, + "tps_mean": 73.65, "tps_std": 0.01, "error": false, "error_type": null, @@ -37691,8 +27023,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37702,10 +27034,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 398.06, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 585.34, "tps_std": 0.0, "error": false, "error_type": null, @@ -37716,11 +27048,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37730,10 +27062,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.13, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 61.13, "tps_std": 0.0, "error": false, "error_type": null, @@ -37744,11 +27076,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37761,8 +27093,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1365.89, - "tps_std": 19.13, + "tps_mean": 777.22, + "tps_std": 12.74, "error": false, "error_type": null, "backend": "ROCm", @@ -37775,8 +27107,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37789,7 +27121,119 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.49, + "tps_mean": 73.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 533.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 61.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1752.24, + "tps_std": 12.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.82, "tps_std": 0.01, "error": false, "error_type": null, @@ -37800,95 +27244,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 456.33, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 51.03, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1510.54, - "tps_std": 2.64, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37898,10 +27258,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.95, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 796.69, "tps_std": 0.0, "error": false, "error_type": null, @@ -37912,11 +27272,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37926,10 +27286,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 474.83, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.1, "tps_std": 0.0, "error": false, "error_type": null, @@ -37940,39 +27300,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.83, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -37985,8 +27317,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1598.15, - "tps_std": 4.85, + "tps_mean": 1717.36, + "tps_std": 12.37, "error": false, "error_type": null, "backend": "ROCm", @@ -37999,8 +27331,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38013,8 +27345,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.02, - "tps_std": 0.03, + "tps_mean": 72.94, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -38027,8 +27359,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38038,10 +27370,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 477.4, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 796.57, "tps_std": 0.0, "error": false, "error_type": null, @@ -38052,11 +27384,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38066,10 +27398,10 @@ "env_base": "rocm6_4_4", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.86, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.08, "tps_std": 0.0, "error": false, "error_type": null, @@ -38080,11 +27412,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38097,8 +27429,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1510.09, - "tps_std": 10.37, + "tps_mean": 1710.01, + "tps_std": 23.22, "error": false, "error_type": null, "backend": "ROCm", @@ -38111,8 +27443,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38125,7 +27457,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.09, + "tps_mean": 73.48, "tps_std": 0.0, "error": false, "error_type": null, @@ -38139,8 +27471,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38150,10 +27482,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 518.94, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 808.14, "tps_std": 0.0, "error": false, "error_type": null, @@ -38164,11 +27496,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38178,10 +27510,10 @@ "env_base": "rocm6_4_4", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 56.02, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 64.31, "tps_std": 0.0, "error": false, "error_type": null, @@ -38192,11 +27524,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38209,8 +27541,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1572.54, - "tps_std": 11.37, + "tps_mean": 1726.91, + "tps_std": 4.81, "error": false, "error_type": null, "backend": "ROCm", @@ -38223,8 +27555,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38237,8 +27569,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 72.96, - "tps_std": 0.02, + "tps_mean": 73.44, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -38251,8 +27583,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38262,10 +27594,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 554.2, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 817.7, "tps_std": 0.0, "error": false, "error_type": null, @@ -38276,11 +27608,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38290,10 +27622,10 @@ "env_base": "rocm6_4_4", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 56.02, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 64.37, "tps_std": 0.0, "error": false, "error_type": null, @@ -38304,25 +27636,25 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1511.0, - "tps_std": 19.49, + "tps_mean": 1710.28, + "tps_std": 7.42, "error": false, "error_type": null, "backend": "ROCm", @@ -38332,25 +27664,25 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.04, - "tps_std": 0.03, + "tps_mean": 73.12, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -38360,24 +27692,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 345.98, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 670.09, "tps_std": 0.0, "error": false, "error_type": null, @@ -38388,24 +27720,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.89, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.17, "tps_std": 0.0, "error": false, "error_type": null, @@ -38416,25 +27748,25 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1559.27, - "tps_std": 17.65, + "tps_mean": 1695.75, + "tps_std": 25.43, "error": false, "error_type": null, "backend": "ROCm", @@ -38444,25 +27776,25 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.16, - "tps_std": 0.03, + "tps_mean": 73.15, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -38472,24 +27804,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 349.08, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 668.65, "tps_std": 0.0, "error": false, "error_type": null, @@ -38500,24 +27832,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.83, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -38528,25 +27860,25 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1476.67, - "tps_std": 4.0, + "tps_mean": 1670.49, + "tps_std": 30.36, "error": false, "error_type": null, "backend": "ROCm", @@ -38556,136 +27888,136 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 72.53, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 418.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 54.47, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1526.6, - "tps_std": 21.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", "tps_mean": 72.45, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 658.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 62.69, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1661.92, + "tps_std": 6.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.41, "tps_std": 0.02, "error": false, "error_type": null, @@ -38696,24 +28028,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 431.87, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 654.36, "tps_std": 0.0, "error": false, "error_type": null, @@ -38724,24 +28056,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 54.67, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 62.59, "tps_std": 0.0, "error": false, "error_type": null, @@ -38752,11 +28084,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm7.1.1__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38769,8 +28101,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1507.89, - "tps_std": 10.23, + "tps_mean": 1692.63, + "tps_std": 8.33, "error": false, "error_type": null, "backend": "ROCm", @@ -38783,8 +28115,8 @@ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -38797,430 +28129,458 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 73.07, + "tps_mean": 73.09, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 668.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.12, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1721.79, + "tps_std": 15.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.01, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 665.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 42.13, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1668.78, + "tps_std": 30.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.33, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 633.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 62.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1674.84, + "tps_std": 21.95, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.29, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 644.71, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 62.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1965.23, + "tps_std": 21.66, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 75.24, "tps_std": 0.04, "error": false, "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 345.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.82, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1558.14, - "tps_std": 14.95, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 73.06, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 349.21, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 28.87, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1463.05, - "tps_std": 15.58, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.34, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 422.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 54.49, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1513.62, - "tps_std": 5.84, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 72.47, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 435.15, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 54.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1681.86, - "tps_std": 231.36, - "error": false, - "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, @@ -39231,8 +28591,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39242,38 +28602,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 75.38, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 20.91, - "file_size_gib": 11.27, - "name_params_b": 20.91, - "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-mxfp4", - "model_clean": "gpt-oss-20b-mxfp4", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 300.31, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 512.34, "tps_std": 0.0, "error": false, "error_type": null, @@ -39284,11 +28616,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39298,10 +28630,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 46.98, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 56.91, "tps_std": 0.0, "error": false, "error_type": null, @@ -39312,11 +28644,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39329,8 +28661,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1354.58, - "tps_std": 9.42, + "tps_mean": 1395.08, + "tps_std": 16.05, "error": false, "error_type": null, "backend": "Vulkan", @@ -39343,8 +28675,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39357,8 +28689,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 77.1, - "tps_std": 0.22, + "tps_mean": 79.6, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "Vulkan", @@ -39371,8 +28703,8 @@ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39382,10 +28714,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 298.1, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 467.7, "tps_std": 0.0, "error": false, "error_type": null, @@ -39396,11 +28728,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39410,10 +28742,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 52.75, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 65.33, "tps_std": 0.0, "error": false, "error_type": null, @@ -39424,285 +28756,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "gpt-oss-20b-uncensored.Q8_0", - "model_clean": "gpt-oss-20b-uncensored.Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": false, - "error_type": null, - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q8_0", - "log": "results/gpt-oss-20b-uncensored.Q8_0__rocm7_rc__fa1.log", - "rpc": false, - "build": null - }, - { - "model": "gpt-oss-20b-uncensored.Q8_0", - "model_clean": "gpt-oss-20b-uncensored.Q8_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": false, - "error_type": null, - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q8_0", - "log": "results/gpt-oss-20b-uncensored.Q8_0__rocm7_rc__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1091.87, - "tps_std": 1.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.02, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 54.85, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "31df4608", - "number": "7038" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1008.52, - "tps_std": 2.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.29, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 53.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-rocwmma-improved-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-rocwmma-improved-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.58, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma-improved__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "12bb5c37", - "number": "7074" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39715,8 +28773,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1117.58, - "tps_std": 1.52, + "tps_mean": 1556.97, + "tps_std": 0.78, "error": false, "error_type": null, "backend": "ROCm", @@ -39729,8 +28787,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39743,7 +28801,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.47, + "tps_mean": 50.65, "tps_std": 0.02, "error": false, "error_type": null, @@ -39757,8 +28815,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39768,10 +28826,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 47.68, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 94.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -39782,11 +28840,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39796,10 +28854,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.58, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 10.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -39810,11 +28868,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39827,8 +28885,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1012.09, - "tps_std": 1.56, + "tps_mean": 1561.25, + "tps_std": 2.77, "error": false, "error_type": null, "backend": "ROCm", @@ -39841,8 +28899,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39855,8 +28913,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.61, - "tps_std": 0.01, + "tps_mean": 50.59, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "ROCm", @@ -39869,8 +28927,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39880,10 +28938,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 46.59, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 94.41, "tps_std": 0.0, "error": false, "error_type": null, @@ -39894,11 +28952,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39908,10 +28966,10 @@ "env_base": "rocm", "env_variant": "7alpha-rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.58, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 10.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -39922,11 +28980,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "4db63cdde", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39939,8 +28997,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1119.14, - "tps_std": 0.89, + "tps_mean": 1572.74, + "tps_std": 2.6, "error": false, "error_type": null, "backend": "ROCm", @@ -39953,8 +29011,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39967,7 +29025,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.51, + "tps_mean": 50.65, "tps_std": 0.02, "error": false, "error_type": null, @@ -39981,8 +29039,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -39992,10 +29050,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 167.07, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 351.26, "tps_std": 0.0, "error": false, "error_type": null, @@ -40006,11 +29064,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -40020,10 +29078,10 @@ "env_base": "rocm", "env_variant": "7alpha", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.58, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 10.04, "tps_std": 0.0, "error": false, "error_type": null, @@ -40034,11 +29092,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -40051,8 +29109,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1017.17, - "tps_std": 2.7, + "tps_mean": 1572.88, + "tps_std": 2.44, "error": false, "error_type": null, "backend": "ROCm", @@ -40065,8 +29123,8 @@ "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "4fc43d43d", - "number": "7085" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -40079,312 +29137,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.53, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "4fc43d43d", - "number": "7085" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7alpha-hblt0", - "env_base": "rocm", - "env_variant": "7alpha-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": null, - "tps_mean": null, - "tps_std": null, - "error": true, - "error_type": "runtime", - "backend": null, - "ngl": null, - "mmap": null, - "params_b": null, - "file_size_gib": null, - "name_params_b": null, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": null - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1113.73, - "tps_std": 1.45, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.09, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 49.04, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1014.02, - "tps_std": 2.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.96, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 49.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-rocwmma-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1116.12, - "tps_std": 3.27, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.11, + "tps_mean": 50.64, "tps_std": 0.02, "error": false, "error_type": null, @@ -40395,24 +29148,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 186.52, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 346.35, "tps_std": 0.0, "error": false, "error_type": null, @@ -40423,24 +29176,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env": "rocm-7alpha-hblt0", + "env_base": "rocm", + "env_variant": "7alpha-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 10.03, "tps_std": 0.0, "error": false, "error_type": null, @@ -40451,137 +29204,25 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7alpha__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", + "env": "rocm6_4_4-rocwmma", "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1021.92, - "tps_std": 1.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.07, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 188.56, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm6_4_4-hblt0", - "env_base": "rocm6_4_4", - "env_variant": "hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "1c398dc9e", - "number": "7034" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1100.61, - "tps_std": 4.23, + "tps_mean": 1525.39, + "tps_std": 0.85, "error": false, "error_type": null, "backend": "ROCm", @@ -40591,247 +29232,23 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", "env_variant": "rocwmma", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.08, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 70.66, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1007.54, - "tps_std": 4.33, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.04, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 70.57, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.92, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1-rocwmma__hblt0__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1107.98, - "tps_std": 1.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1.log", - "rpc": false, - "build": { - "hash": "ee8dd5c65", - "number": "7035" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", "tps_mean": 51.05, "tps_std": 0.02, "error": false, @@ -40843,24 +29260,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 171.2, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 104.91, "tps_std": 0.0, "error": false, "error_type": null, @@ -40871,24 +29288,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1", - "env_base": "rocm7.1", - "env_variant": null, + "env": "rocm6_4_4-rocwmma", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.08, "tps_std": 0.0, "error": false, "error_type": null, @@ -40899,25 +29316,25 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1015.07, - "tps_std": 2.17, + "tps_mean": 1524.22, + "tps_std": 2.19, "error": false, "error_type": null, "backend": "ROCm", @@ -40927,24 +29344,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.0, + "tps_mean": 51.02, "tps_std": 0.02, "error": false, "error_type": null, @@ -40955,24 +29372,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 174.55, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 106.82, "tps_std": 0.0, "error": false, "error_type": null, @@ -40983,24 +29400,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", - "env_variant": "hblt0", + "env": "rocm6_4_4-rocwmma-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "rocwmma-hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.07, "tps_std": 0.0, "error": false, "error_type": null, @@ -41011,25 +29428,25 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7.1__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4-rocwmma__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "ee8dd5c65", - "number": "7035" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1100.81, - "tps_std": 1.25, + "tps_mean": 1539.28, + "tps_std": 0.84, "error": false, "error_type": null, "backend": "ROCm", @@ -41039,136 +29456,248 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.0, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 69.72, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma", - "env_base": "rocm7_rc", - "env_variant": "rocwmma", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 963.09, - "tps_std": 2.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", - "rpc": false, - "build": { - "hash": "bca95ca51", - "number": "7036" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", "tps_mean": 51.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 384.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1538.89, + "tps_std": 3.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 51.07, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 382.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4-hblt0", + "env_base": "rocm6_4_4", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 12.07, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1492.67, + "tps_std": 1.4, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.89, "tps_std": 0.01, "error": false, "error_type": null, @@ -41179,24 +29708,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 71.68, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 141.66, "tps_std": 0.0, "error": false, "error_type": null, @@ -41207,24 +29736,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-rocwmma-hblt0", - "env_base": "rocm7_rc", - "env_variant": "rocwmma-hblt0", + "env": "rocm7.1.1-rocwmma", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.37, "tps_std": 0.0, "error": false, "error_type": null, @@ -41235,25 +29764,137 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "bca95ca51", - "number": "7036" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1495.58, + "tps_std": 2.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.97, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 141.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1-rocwmma-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1113.49, - "tps_std": 3.85, + "tps_mean": 1526.32, + "tps_std": 2.1, "error": false, "error_type": null, "backend": "ROCm", @@ -41263,25 +29904,25 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 51.02, - "tps_std": 0.02, + "tps_mean": 50.96, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -41291,24 +29932,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 177.96, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 348.77, "tps_std": 0.0, "error": false, "error_type": null, @@ -41319,24 +29960,24 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc", - "env_base": "rocm7_rc", + "env": "rocm7.1.1", + "env_base": "rocm7.1.1", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.92, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.34, "tps_std": 0.0, "error": false, "error_type": null, @@ -41347,25 +29988,25 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 968.65, - "tps_std": 2.86, + "tps_mean": 1530.07, + "tps_std": 0.42, "error": false, "error_type": null, "backend": "ROCm", @@ -41375,18 +30016,18 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm7_rc-hblt0", - "env_base": "rocm7_rc", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", "env_variant": "hblt0", "fa": true, "context": "default", @@ -41403,11 +30044,431 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 348.73, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7.1.1-hblt0", + "env_base": "rocm7.1.1", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7.1.1__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1493.41, + "tps_std": 1.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.9, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 139.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma", + "env_base": "rocm7_rc", + "env_variant": "rocwmma", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1494.79, + "tps_std": 2.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.91, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 139.47, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-rocwmma-hblt0", + "env_base": "rocm7_rc", + "env_variant": "rocwmma-hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc-rocwmma__hblt0__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1529.76, + "tps_std": 1.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.85, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 350.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc", + "env_base": "rocm7_rc", + "env_variant": null, + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.35, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__fa1__longctx16384.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1530.14, + "tps_std": 1.62, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41417,10 +30478,38 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 173.34, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.88, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "2aa45ef9e", + "number": "7423" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7_rc-hblt0", + "env_base": "rocm7_rc", + "env_variant": "hblt0", + "fa": true, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 347.7, "tps_std": 0.0, "error": false, "error_type": null, @@ -41431,11 +30520,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41445,10 +30534,10 @@ "env_base": "rocm7_rc", "env_variant": "hblt0", "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.91, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 13.35, "tps_std": 0.0, "error": false, "error_type": null, @@ -41459,11 +30548,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm7_rc__hblt0__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41476,8 +30565,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1378.42, - "tps_std": 1.37, + "tps_mean": 1494.56, + "tps_std": 4.36, "error": false, "error_type": null, "backend": "Vulkan", @@ -41490,8 +30579,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41504,7 +30593,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 53.41, + "tps_mean": 56.03, "tps_std": 0.06, "error": false, "error_type": null, @@ -41518,8 +30607,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41529,10 +30618,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 101.79, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 196.22, "tps_std": 0.0, "error": false, "error_type": null, @@ -41543,11 +30632,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41557,10 +30646,10 @@ "env_base": "vulkan_amdvlk", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 9.25, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 15.95, "tps_std": 0.0, "error": false, "error_type": null, @@ -41571,11 +30660,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41588,8 +30677,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1094.33, - "tps_std": 3.06, + "tps_mean": 1135.49, + "tps_std": 4.16, "error": false, "error_type": null, "backend": "Vulkan", @@ -41602,8 +30691,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41616,8 +30705,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 53.65, - "tps_std": 0.01, + "tps_mean": 55.73, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -41630,8 +30719,8 @@ "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41641,10 +30730,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 174.6, + "context": "longctx16384", + "context_tokens": 16384, + "test": "pp2048 @ d16384", + "tps_mean": 294.01, "tps_std": 0.0, "error": false, "error_type": null, @@ -41655,11 +30744,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -41669,10 +30758,10 @@ "env_base": "vulkan_radv", "env_variant": null, "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 8.7, + "context": "longctx16384", + "context_tokens": 16384, + "test": "tg32 @ d16384", + "tps_mean": 15.16, "tps_std": 0.0, "error": false, "error_type": null, @@ -41683,11 +30772,11 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx16384.log", "rpc": false, "build": { - "hash": "1c398dc9e", - "number": "7034" + "hash": "2aa45ef9e", + "number": "7423" } }, { @@ -42141,8 +31230,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "longctx32768", @@ -42169,8 +31258,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "longctx32768", @@ -42197,8 +31286,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", @@ -42225,8 +31314,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", @@ -42253,8 +31342,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "longctx32768", @@ -42281,8 +31370,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "longctx32768", @@ -42309,8 +31398,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "default", @@ -42337,8 +31426,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "default", @@ -42365,8 +31454,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-hblt0", + "env_base": "rocm7_1", "env_variant": "hblt0", "fa": false, "context": "longctx32768", @@ -42390,8 +31479,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-hblt0", + "env_base": "rocm7_1", "env_variant": "hblt0", "fa": true, "context": "default", @@ -42418,8 +31507,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-hblt0", + "env_base": "rocm7_1", "env_variant": "hblt0", "fa": true, "context": "default", @@ -42446,8 +31535,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7_1", + "env_base": "rocm7_1", "env_variant": null, "fa": false, "context": "longctx32768", @@ -42471,8 +31560,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7_1", + "env_base": "rocm7_1", "env_variant": null, "fa": true, "context": "default", @@ -42499,8 +31588,8 @@ { "model": "GLM-4.6-UD-Q4_K_XL-00001-of-00005", "model_clean": "GLM-4.6-UD-Q4_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7_1", + "env_base": "rocm7_1", "env_variant": null, "fa": true, "context": "default", @@ -43971,8 +33060,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "longctx32768", @@ -43999,8 +33088,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "longctx32768", @@ -44027,8 +33116,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", @@ -44055,8 +33144,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma-hblt0", + "env_base": "rocm7_1", "env_variant": "rocwmma-hblt0", "fa": true, "context": "default", @@ -44083,8 +33172,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "longctx32768", @@ -44111,8 +33200,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "longctx32768", @@ -44139,8 +33228,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "default", @@ -44167,8 +33256,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-rocwmma", - "env_base": "rocm7.1", + "env": "rocm7_1-rocwmma", + "env_base": "rocm7_1", "env_variant": "rocwmma", "fa": true, "context": "default", @@ -44195,8 +33284,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-hblt0", + "env_base": "rocm7_1", "env_variant": "hblt0", "fa": true, "context": "longctx32768", @@ -44223,8 +33312,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-hblt0", + "env_base": "rocm7_1", "env_variant": "hblt0", "fa": true, "context": "longctx32768", @@ -44251,8 +33340,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-hblt0", + "env_base": "rocm7_1", "env_variant": "hblt0", "fa": true, "context": "default", @@ -44279,8 +33368,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1-hblt0", - "env_base": "rocm7.1", + "env": "rocm7_1-hblt0", + "env_base": "rocm7_1", "env_variant": "hblt0", "fa": true, "context": "default", @@ -44307,8 +33396,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7_1", + "env_base": "rocm7_1", "env_variant": null, "fa": true, "context": "longctx32768", @@ -44335,8 +33424,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7_1", + "env_base": "rocm7_1", "env_variant": null, "fa": true, "context": "longctx32768", @@ -44363,8 +33452,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7_1", + "env_base": "rocm7_1", "env_variant": null, "fa": true, "context": "default", @@ -44391,8 +33480,8 @@ { "model": "MiniMax-M2-UD-Q6_K_XL-00001-of-00004", "model_clean": "MiniMax-M2-UD-Q6_K_XL", - "env": "rocm7.1", - "env_base": "rocm7.1", + "env": "rocm7_1", + "env_base": "rocm7_1", "env_variant": null, "fa": true, "context": "default",