diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..6fe8641 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 66.52 ± 7.27 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.76 ± 0.08 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..f2148c0 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.57 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.08 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..648542a --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | pp512 | 71.47 ± 0.20 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | tg128 | 2.82 ± 0.07 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..2892f38 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 16.47 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.06 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..f3637a5 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 133.48 ± 0.41 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 22.52 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..c5e6271 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 34.18 ± 0.22 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 8.12 ± 0.14 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..cb1d652 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 181.28 ± 1.15 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 22.65 ± 0.06 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..943b26b --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 35.28 ± 0.24 | +| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.97 ± 0.43 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log new file mode 100644 index 0000000..6d916e4 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 56.12 ± 0.13 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 16.60 ± 0.04 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..4c9a183 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 22.87 ± 0.07 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 6.89 ± 0.67 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..79e4936 --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | pp512 | 105.50 ± 0.54 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | tg128 | 16.65 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..6020dcf --- /dev/null +++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 32.93 ± 0.13 | +| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.24 ± 0.04 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..eb98cfd --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.50 ± 0.01 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.78 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..6c2824c --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 27.87 ± 0.75 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.42 ± 0.06 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..1533579 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | pp512 | 49.27 ± 0.02 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | tg128 | 2.79 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c15e681 --- /dev/null +++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 28.46 ± 0.36 | +| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 2.44 ± 0.04 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log new file mode 100644 index 0000000..921c345 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 797.13 ± 2.39 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..9dfec45 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 146.47 ± 5.52 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..0832956 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | pp512 | 803.39 ± 2.22 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | tg128 | 8.49 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..91e85e1 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 152.56 ± 6.51 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 7.11 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log new file mode 100644 index 0000000..a25472a --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 393.57 ± 2.37 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.69 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..7ccfdf7 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 559.16 ± 0.99 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.74 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..eaea7c6 --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | pp512 | 389.10 ± 3.02 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 1 | tg128 | 41.68 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d0ae54b --- /dev/null +++ b/benchmark/results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 558.52 ± 1.33 | +| nemotron_h_moe 31B.A3.5B Q8_0 | 37.66 GiB | 31.58 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 39.73 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log new file mode 100644 index 0000000..8ce6864 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 178.32 ± 26.83 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.97 ± 0.98 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..9777216 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 38.11 ± 0.36 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.90 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..ce3e316 --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | pp512 | 167.63 ± 28.76 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | tg128 | 12.73 ± 0.74 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..929e46f --- /dev/null +++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 37.92 ± 0.33 | +| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 9.78 ± 0.20 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..3aa8493 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 484.09 ± 10.61 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.12 ± 0.16 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..4f16aaf --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 162.38 ± 4.20 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 18.71 ± 1.16 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..50fd7e6 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 492.54 ± 2.48 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 27.09 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..3bcfa1d --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.72 ± 4.90 | +| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 19.36 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log new file mode 100644 index 0000000..a326a57 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 813.78 ± 5.52 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..a38fcee --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 154.84 ± 3.34 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.32 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..dba8fa9 --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 789.10 ± 47.98 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 58.51 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..49dce2c --- /dev/null +++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 155.23 ± 3.28 | +| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 31.27 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log new file mode 100644 index 0000000..06add3d --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1207.32 ± 7.42 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.51 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..3022aa0 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 161.97 ± 3.98 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.41 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..101bd0a --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | pp512 | 1096.05 ± 129.46 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | tg128 | 71.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..54589c8 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 160.95 ± 3.41 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 34.37 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..fd2dcb3 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 211.96 ± 2.48 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.74 ± 0.49 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..e9297f0 --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 239.82 ± 0.51 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 24.76 ± 1.87 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..fe3532d --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | pp512 | 262.57 ± 3.77 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 1 | tg128 | 28.94 ± 0.05 | + +build: a14b960bc (7816) diff --git a/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d6b3f2b --- /dev/null +++ b/benchmark/results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 295.41 ± 0.37 | +| qwen3next 80B.A3B Q8_0 | 86.68 GiB | 79.67 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 23.85 ± 3.58 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log new file mode 100644 index 0000000..a2749f2 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 323.22 ± 0.21 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.24 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..9f14bc9 --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 231.95 ± 3.74 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..5cc6d4b --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | pp512 | 324.04 ± 0.14 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | tg128 | 14.25 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..d9d363a --- /dev/null +++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 230.54 ± 3.36 | +| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 11.66 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log new file mode 100644 index 0000000..e6e1d62 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 430.73 ± 1.02 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 3.86 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..f7bcf1c --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 193.46 ± 0.50 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.69 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..02273a2 --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | pp512 | 525.55 ± 1.67 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | tg128 | 4.00 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..76656be --- /dev/null +++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 202.49 ± 3.52 | +| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 3.72 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log new file mode 100644 index 0000000..83da529 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2858.32 ± 17.99 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 84.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..5def9e8 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1182.57 ± 31.53 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..455b875 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | pp512 | 2679.11 ± 228.92 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | tg128 | 70.08 ± 4.20 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..ebaaee2 --- /dev/null +++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 1138.90 ± 19.11 | +| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 61.59 ± 0.04 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log new file mode 100644 index 0000000..2299f09 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.34 ± 1.51 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.25 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..6275774 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 217.76 ± 0.23 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.60 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..c90c1af --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | pp512 | 181.33 ± 1.42 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | tg128 | 52.27 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..c7a144b --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 215.89 ± 4.16 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 36.64 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log new file mode 100644 index 0000000..4b54d27 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 548.07 ± 6.87 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.66 ± 0.02 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..ae41eeb --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.62 ± 3.36 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..9aea9e3 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | pp512 | 546.89 ± 7.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | tg128 | 73.57 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..9029d6a --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 337.07 ± 4.50 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 52.54 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1.log new file mode 100644 index 0000000..4834219 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.23 ± 0.33 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.62 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log new file mode 100644 index 0000000..fa012c7 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 139.39 ± 0.75 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.59 ± 0.00 | + +build: a14b960bc (7816) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log new file mode 100644 index 0000000..58b1ece --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | pp512 | 549.22 ± 0.08 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | tg128 | 50.67 ± 0.01 | + +build: a14b960bc (7816) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log new file mode 100644 index 0000000..1f2b6d2 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices: + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32 +| model | size | params | backend | ngl | n_ubatch | fa | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | pp2048 @ d32768 | 137.30 ± 1.39 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | tg32 @ d32768 | 5.60 ± 0.00 | + +build: a14b960bc (7816) diff --git a/docs/results.json b/docs/results.json index d36f20e..824d7dd 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2026-01-12T13:18:12Z", + "generated_at": "2026-01-23T15:10:46Z", "system_info": { "distro": "Fedora Linux 42 (Workstation Edition)", "kernel": "6.18.3-100.fc42.x86_64", @@ -15,9 +15,15 @@ { "hash": "9c142e3a2", "number": "7670" + }, + { + "hash": "a14b960bc", + "number": "7816" } ], "environments": [ + "rocm-7.2", + "rocm-7.2-hblt0", "rocm6_4_4", "rocm6_4_4-hblt0", "rocm7-nightlies", @@ -31,6 +37,230 @@ "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second" }, "runs": [ + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 66.52, + "tps_std": 7.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.76, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.57, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 71.47, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.82, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.47, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.06, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", @@ -896,6 +1126,230 @@ "number": "7670" } }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 133.48, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.52, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 34.18, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.12, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 181.28, + "tps_std": 1.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.65, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 35.28, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", + "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.97, + "tps_std": 0.43, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 68.01, + "name_params_b": 110.47, + "quant": "Q4_K_XL", + "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002", "model_clean": "GLM-4.5-Air-UD-Q4_K_XL", @@ -1792,6 +2246,230 @@ "number": "7670" } }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 56.12, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.6, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 22.87, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.89, + "tps_std": 0.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 105.5, + "tps_std": 0.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.65, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 32.93, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", + "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.24, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 110.47, + "file_size_gib": 94.57, + "name_params_b": 110.47, + "quant": "Q6_K_XL", + "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003", "model_clean": "GLM-4.5-Air-UD-Q6_K_XL", @@ -2688,6 +3366,230 @@ "number": "7670" } }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 49.5, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 27.87, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.42, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 49.27, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.79, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 28.46, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.44, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 70.55, + "file_size_gib": 75.65, + "name_params_b": 70.55, + "quant": "Q8_K_XL", + "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL", @@ -3584,6 +4486,230 @@ "number": "7670" } }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 797.13, + "tps_std": 2.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 146.47, + "tps_std": 5.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 803.39, + "tps_std": 2.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 152.56, + "tps_std": 6.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", @@ -4480,6 +5606,230 @@ "number": "7670" } }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 393.57, + "tps_std": 2.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 559.16, + "tps_std": 0.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.74, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 389.1, + "tps_std": 3.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 558.52, + "tps_std": 1.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.73, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 31.58, + "file_size_gib": 37.66, + "name_params_b": 31.58, + "quant": "Q8_K_XL", + "log": "results/Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", "model_clean": "Nemotron-3-Nano-30B-A3B-UD-Q8_K_XL", @@ -5376,6 +6726,230 @@ "number": "7670" } }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 178.32, + "tps_std": 26.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.97, + "tps_std": 0.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 38.11, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.9, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 167.63, + "tps_std": 28.76, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 12.73, + "tps_std": 0.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 37.92, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", + "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 9.78, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 235.09, + "file_size_gib": 96.99, + "name_params_b": 235.09, + "quant": "Q3_K_XL", + "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003", "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL", @@ -6241,6 +7815,230 @@ "number": "7670" } }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 484.09, + "tps_std": 10.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.12, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 162.38, + "tps_std": 4.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 18.71, + "tps_std": 1.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 492.54, + "tps_std": 2.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 27.09, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 161.72, + "tps_std": 4.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3-30B-A3B-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 56.89, + "name_params_b": 30.53, + "quant": "BF16", + "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Qwen3-30B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3-30B-A3B-BF16", @@ -7137,6 +8935,230 @@ "number": "7670" } }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 813.78, + "tps_std": 5.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 154.84, + "tps_std": 3.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.32, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 789.1, + "tps_std": 47.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.51, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 155.23, + "tps_std": 3.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 31.27, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 24.53, + "name_params_b": 30.53, + "quant": "Q6_K_XL", + "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL", @@ -8033,6 +10055,230 @@ "number": "7670" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1207.32, + "tps_std": 7.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 71.51, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 161.97, + "tps_std": 3.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.41, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1096.05, + "tps_std": 129.46, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 71.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 160.95, + "tps_std": 3.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 34.37, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -8929,6 +11175,230 @@ "number": "7670" } }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 211.96, + "tps_std": 2.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.74, + "tps_std": 0.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 239.82, + "tps_std": 0.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 24.76, + "tps_std": 1.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 262.57, + "tps_std": 3.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 28.94, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 295.41, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", + "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 23.85, + "tps_std": 3.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 79.67, + "file_size_gib": 86.68, + "name_params_b": 79.67, + "quant": "Q8_K_XL", + "log": "results/Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL-00001-of-00002", "model_clean": "Qwen3-Next-80B-A3B-Instruct-UD-Q8_K_XL", @@ -9825,6 +12295,230 @@ "number": "7670" } }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 323.22, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.24, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 231.95, + "tps_std": 3.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 324.04, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 14.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 230.54, + "tps_std": 3.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-12b-it-UD-Q8_K_XL", + "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 11.66, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 11.77, + "file_size_gib": 13.4, + "name_params_b": 11.77, + "quant": "Q8_K_XL", + "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "gemma-3-12b-it-UD-Q8_K_XL", "model_clean": "gemma-3-12b-it-UD-Q8_K_XL", @@ -10721,6 +13415,230 @@ "number": "7670" } }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 430.73, + "tps_std": 1.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.86, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 193.46, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.69, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 525.55, + "tps_std": 1.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 4.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 202.49, + "tps_std": 3.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-27b-it-BF16-00001-of-00002", + "model_clean": "gemma-3-27b-it-BF16", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.72, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 27.01, + "file_size_gib": 50.31, + "name_params_b": 27.01, + "quant": "BF16", + "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "gemma-3-27b-it-BF16-00001-of-00002", "model_clean": "gemma-3-27b-it-BF16", @@ -11642,6 +14560,230 @@ "number": "7670" } }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2858.32, + "tps_std": 17.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 84.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1182.57, + "tps_std": 31.53, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 61.59, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2679.11, + "tps_std": 228.92, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 70.08, + "tps_std": 4.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 1138.9, + "tps_std": 19.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gemma-3-4b-it-Q3_K_S", + "model_clean": "gemma-3-4b-it-Q3_K_S", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 61.59, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 3.88, + "file_size_gib": 1.8, + "name_params_b": 3.88, + "quant": "Q3_K_S", + "log": "results/gemma-3-4b-it-Q3_K_S__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "gemma-3-4b-it-Q3_K_S", "model_clean": "gemma-3-4b-it-Q3_K_S", @@ -12538,6 +15680,230 @@ "number": "7670" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 181.34, + "tps_std": 1.51, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.25, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 217.76, + "tps_std": 0.23, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.6, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 181.33, + "tps_std": 1.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 52.27, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 215.89, + "tps_std": 4.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.64, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -13434,6 +16800,230 @@ "number": "7670" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 548.07, + "tps_std": 6.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.66, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 337.62, + "tps_std": 3.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 52.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 546.89, + "tps_std": 7.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 73.57, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 337.07, + "tps_std": 4.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 52.54, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -14330,6 +17920,230 @@ "number": "7670" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 549.23, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.62, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 139.39, + "tps_std": 0.75, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2", + "env_base": "rocm", + "env_variant": "7.2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.59, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 549.22, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.67, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 137.3, + "tps_std": 1.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7.2-hblt0", + "env_base": "rocm", + "env_variant": "7.2-hblt0", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": null, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7.2__hblt0__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "a14b960bc", + "number": "7816" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0",