diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log similarity index 86% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log rename to benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log index 2e9926a..486713e 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 78.36 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 72.06 ± 0.03 | | llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.99 ± 0.00 | -build: 3f8752b55 (8743) +build: 7957de9dc (8645) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log index d662fe6..795fa30 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.77 ± 0.09 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.06 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.76 ± 0.05 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log similarity index 77% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log index 6255b14..93d71b6 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 70.42 ± 0.04 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.96 ± 0.00 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 77.83 ± 0.10 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.95 ± 0.00 | -build: 7957de9dc (8645) +build: f53577432 (8942) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log index 488e9b3..716d737 100644 --- a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.67 ± 0.05 | -| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.08 ± 0.01 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 17.10 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.06 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log similarity index 80% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log index 867aa2a..e284659 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 403.52 ± 2.05 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.37 ± 0.02 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 403.45 ± 2.11 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.04 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log index 41125bf..d97ca66 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.98 ± 0.28 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 0.00 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.65 ± 0.31 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.25 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log new file mode 100644 index 0000000..d1f71ff --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log @@ -0,0 +1,2 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log index 67fbf08..ed83496 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 88.16 ± 0.08 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.07 ± 0.20 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.91 ± 0.20 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 14.84 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log similarity index 81% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log index 1f41d31..0252058 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 947.24 ± 201.85 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.23 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 1058.10 ± 2.19 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.18 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log index 717cb8c..6a9e2f9 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.69 ± 0.58 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.63 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.21 ± 1.08 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.62 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log similarity index 77% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log rename to benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log index bf4287e..79c3029 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 914.78 ± 3.38 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.73 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 889.16 ± 36.98 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 32.61 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log index 20f5d80..b24dbc6 100644 --- a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.49 ± 0.75 | -| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.47 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.01 ± 1.51 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.38 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log similarity index 79% rename from benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log index f015431..6151c79 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 358.12 ± 0.68 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 23.00 ± 0.11 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 356.93 ± 1.86 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.97 ± 0.13 | build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 94% rename from benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log index d31910f..73cec88 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 63.57 ± 0.16 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.16 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 65.86 ± 0.58 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.10 ± 0.04 | build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log similarity index 76% rename from benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log rename to benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log index 3e48901..e2db156 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 238.25 ± 1.14 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.98 ± 0.12 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 236.39 ± 1.24 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.64 ± 0.01 | -build: ff5ef8278 (8763) +build: f53577432 (8942) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log similarity index 91% rename from benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log index 9f15e49..52c1dcd 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 63.19 ± 1.14 | -| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.16 ± 0.02 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 64.53 ± 0.53 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.04 ± 0.17 | -build: ff5ef8278 (8763) +build: f53577432 (8942) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log deleted file mode 100644 index e4c9a5f..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 376.88 ± 1.23 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 24.00 ± 0.10 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log deleted file mode 100644 index 59b2482..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 63.91 ± 0.23 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.21 ± 0.26 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log deleted file mode 100644 index f95fa32..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 245.64 ± 1.36 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 24.09 ± 0.06 | - -build: ff5ef8278 (8763) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log deleted file mode 100644 index fed4fc6..0000000 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 66.16 ± 0.41 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.33 ± 0.06 | - -build: ff5ef8278 (8763) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log similarity index 90% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log index 2657c15..d1d295f 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 731.49 ± 3.16 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 745.50 ± 9.08 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 97% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log index ac43c79..5c893b9 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.94 ± 0.77 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.19 ± 1.96 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log similarity index 86% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log index 8317da0..d702654 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 747.81 ± 7.01 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 732.09 ± 2.49 | | mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log index 783e117..2bb5fa9 100644 --- a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.77 ± 1.55 | -| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 182.01 ± 2.08 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.07 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log similarity index 75% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log rename to benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log index 84442e0..bf06839 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 264.16 ± 1.43 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.01 ± 0.04 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 333.46 ± 1.20 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 15.99 ± 0.04 | -build: 3f8752b55 (8743) +build: 7957de9dc (8645) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 96% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log index 8ca4410..c4e622b 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 308.76 ± 0.39 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 307.47 ± 0.55 | | nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.60 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log similarity index 84% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log index 6b25238..44934bf 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 334.32 ± 1.13 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.01 ± 0.03 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 267.67 ± 1.70 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.18 ± 0.07 | -build: 7957de9dc (8645) +build: f53577432 (8942) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log similarity index 90% rename from benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log index 3ba1084..b1d3089 100644 --- a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 299.29 ± 0.40 | -| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.59 ± 0.01 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 304.86 ± 0.24 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.79 ± 0.04 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log similarity index 81% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log index e86faa7..2296b6a 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1504.86 ± 5.67 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.86 ± 0.21 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1498.50 ± 6.83 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 68.29 ± 0.67 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log index 1470a47..608ce77 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 214.08 ± 0.06 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.54 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 213.71 ± 0.11 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.47 ± 0.03 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log similarity index 77% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log index 9f9df90..7f4faac 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1148.88 ± 7.32 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.80 ± 0.73 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1149.95 ± 8.12 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.57 ± 0.10 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log index 569cf72..ac5a4b3 100644 --- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 194.78 ± 0.05 | -| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.46 ± 0.02 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 194.74 ± 0.07 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.36 ± 0.13 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log similarity index 81% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log index e371ccd..5b9a82c 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 404.64 ± 1.49 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.61 ± 0.03 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 418.75 ± 3.97 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.62 ± 0.03 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 94% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log index 2d39364..c456266 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 251.55 ± 9.38 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.51 ± 0.01 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 207.29 ± 0.79 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 2.04 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log similarity index 76% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log rename to benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log index c7315fb..b9f6f0d 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 312.22 ± 2.95 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.60 ± 0.06 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 314.59 ± 2.13 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.96 ± 0.15 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log similarity index 91% rename from benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log index b3d3697..2143a5b 100644 --- a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 250.19 ± 0.15 | -| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.88 ± 1.13 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 235.56 ± 9.94 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.82 ± 0.08 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 75aff14..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 383.01 ± 0.52 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 22.11 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log deleted file mode 100644 index 7489d62..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1380.26 ± 14.08 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.71 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log deleted file mode 100644 index 6459dce..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1098.31 ± 7.28 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.89 ± 0.32 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 1de1e9d..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1080.97 ± 5.37 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 45.98 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log deleted file mode 100644 index 730ba29..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1111.77 ± 11.14 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.94 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log deleted file mode 100644 index 27e4c62..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 563.74 ± 0.42 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.46 ± 0.01 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log deleted file mode 100644 index 4423708..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 703.33 ± 5.75 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 29.06 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log deleted file mode 100644 index be600e8..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 489.36 ± 3.31 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.31 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log deleted file mode 100644 index 3d6dabe..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 698.12 ± 4.53 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 25.88 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log deleted file mode 100644 index af70e44..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 615.35 ± 3.03 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.34 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log deleted file mode 100644 index 5a8312d..0000000 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 553.41 ± 1.44 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.57 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log similarity index 80% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log index c171b99..ffdd0be 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 511.62 ± 3.03 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.68 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 513.08 ± 3.80 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.46 ± 0.06 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log index 599c971..1800190 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 381.40 ± 0.84 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.86 ± 3.06 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 379.44 ± 1.48 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.65 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log similarity index 77% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log index 95db09a..f836f61 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 512.14 ± 2.33 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.77 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 509.14 ± 3.56 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.98 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log index c3274c3..d1591bf 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 390.80 ± 4.30 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.61 ± 0.00 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 391.47 ± 0.29 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.88 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log similarity index 77% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log index f236f09..7f7683b 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 509.12 ± 4.29 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.63 ± 0.12 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 538.42 ± 12.90 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.91 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log similarity index 92% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log index 5911d61..1393959 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 354.63 ± 1.42 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.65 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 412.11 ± 0.60 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 21.59 ± 0.09 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..f3a2f0e --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log @@ -0,0 +1,6 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +main: error: failed to load model '/home/kyuz0/models/qwen-3.6-35b-a3b/BF16/Qwen3.6-35B-A3B-BF16-00001-of-00002.gguf' +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +✖ ! [rocm7-nightlies] Qwen3.6-35B-A3B-BF16-00001-of-00002__fa1 failed (exit 0) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..c843485 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,6 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +main: error: failed to load model '/home/kyuz0/models/qwen-3.6-35b-a3b/BF16/Qwen3.6-35B-A3B-BF16-00001-of-00002.gguf' +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +✖ ! [rocm7-nightlies] Qwen3.6-35B-A3B-BF16-00001-of-00002__fa1 __longctx32768 failed (exit 0) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log similarity index 79% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log index fbf9df3..6a5a268 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.66 ± 0.28 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.56 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 122.43 ± 0.23 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 11.55 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log similarity index 87% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log index 3ce022f..028a15c 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 97.36 ± 0.07 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 98.18 ± 0.09 | | qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.94 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log similarity index 87% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log index bb65d81..9cb2ce7 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 317.93 ± 2.34 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 316.59 ± 1.87 | | qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 10.80 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log similarity index 79% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log index fc29ee0..b336a9a 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 258.29 ± 0.54 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.39 ± 0.01 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 267.07 ± 0.91 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.37 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log new file mode 100644 index 0000000..d480c06 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1359.62 ± 5.74 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.34 ± 0.43 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 78% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log index 81afdf3..fc7af7d 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 680.80 ± 1.05 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.13 ± 0.14 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 670.26 ± 1.77 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.42 ± 0.04 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log new file mode 100644 index 0000000..195ecab --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1089.21 ± 5.82 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 49.27 ± 0.08 | + +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log similarity index 75% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log index 73fe55f..5ced017 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 681.43 ± 0.74 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.19 ± 0.03 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 703.58 ± 0.51 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.08 ± 0.33 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log new file mode 100644 index 0000000..0b2707e --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1067.33 ± 6.85 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 48.23 ± 0.11 | + +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log similarity index 75% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log index 5a170f1..e6f9da3 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 611.53 ± 0.52 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.17 ± 0.02 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 769.99 ± 2.79 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.91 ± 0.49 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..083496a --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1091.62 ± 10.45 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 49.10 ± 0.15 | + +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log similarity index 63% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log index a8b07e2..382213d 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -1,8 +1,8 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 669.42 ± 1.32 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 41.96 ± 0.03 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 664.89 ± 1.48 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 42.73 ± 0.65 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log similarity index 62% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log index 56a7d97..ad0830a 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 663.48 ± 2.94 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.17 ± 0.04 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 663.94 ± 2.80 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 57.13 ± 0.04 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log similarity index 61% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log index ea9d147..beda67a 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 289.32 ± 1.23 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 44.23 ± 0.08 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 292.49 ± 0.18 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 43.97 ± 0.06 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log similarity index 61% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log index f00163a..fea8098 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 673.76 ± 0.15 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 49.03 ± 0.14 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1045.16 ± 5.79 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 58.82 ± 0.08 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log similarity index 61% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log index 38c9115..e344a93 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 1016.96 ± 39.65 | -| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 59.11 ± 0.06 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 682.80 ± 1.45 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 48.77 ± 0.07 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log similarity index 63% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log index 3dae2a9..42341b2 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 739.92 ± 29.08 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 29.61 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1222.77 ± 2.88 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 43.72 ± 0.08 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..d92138b --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 653.49 ± 0.18 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.21 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log similarity index 60% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log index 4db4f18..06efdb6 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 697.35 ± 3.46 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 28.10 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1043.12 ± 46.10 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 44.53 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log new file mode 100644 index 0000000..14a3929 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 544.04 ± 1.79 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.70 ± 0.21 | + +build: f53577432 (8942) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log similarity index 60% rename from benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log index ac65f9e..1c3f7f0 100644 --- a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 404.18 ± 1.94 | -| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 20.88 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1049.22 ± 7.25 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 43.10 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..a75e2df --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 758.76 ± 0.99 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.70 ± 0.01 | + +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..637ab39 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1036.89 ± 9.24 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 44.20 ± 0.00 | + +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..104e4f9 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 64055 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 64055 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 510.38 ± 2.64 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 38.18 ± 1.71 | + +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log similarity index 62% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log index 6ee1f2c..1ed9fe0 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 415.49 ± 2.30 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 30.74 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 576.25 ± 2.16 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 45.25 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log similarity index 62% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log index 6b9aafa..e078197 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 228.90 ± 0.28 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.62 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 273.77 ± 0.30 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 36.71 ± 0.08 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log similarity index 61% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log index 077f6d0..78c10fa 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 710.93 ± 18.62 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 28.92 ± 0.01 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp512 | 893.79 ± 4.74 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg128 | 46.05 ± 0.07 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log similarity index 61% rename from benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log rename to benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log index 8f81e29..a6902ba 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 531.15 ± 0.20 | -| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 26.32 ± 0.03 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 610.84 ± 1.99 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 39.67 ± 0.15 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log deleted file mode 100644 index 4901fb5..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 864.78 ± 7.29 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.66 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log deleted file mode 100644 index 5e08a1b..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 625.12 ± 11.57 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.99 ± 0.23 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log similarity index 89% rename from benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log index 9172211..ad4fb02 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 825.29 ± 28.88 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.95 ± 0.00 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 836.74 ± 6.15 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.95 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log index f533e1b..2cd4471 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 619.64 ± 9.10 | -| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.37 ± 0.00 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 619.85 ± 6.10 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.38 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log new file mode 100644 index 0000000..ba410bb --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 823.65 ± 83.54 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.67 ± 0.00 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log new file mode 100644 index 0000000..27ef16c --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 628.14 ± 5.88 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.15 ± 0.01 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log deleted file mode 100644 index e9d1e8e..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1298.78 ± 7.07 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.65 ± 0.23 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log deleted file mode 100644 index fb7c672..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 691.44 ± 4.50 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.86 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log similarity index 80% rename from benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log index eccdf91..3363194 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1615.54 ± 7.79 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.95 ± 0.08 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1608.01 ± 5.26 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 47.13 ± 0.29 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 87% rename from benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log index e0c3111..ba51449 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 692.91 ± 9.60 | -| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.13 ± 0.02 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 689.30 ± 12.21 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.14 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log new file mode 100644 index 0000000..e24d90b --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1299.90 ± 11.12 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.59 ± 0.02 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log new file mode 100644 index 0000000..e2f02bd --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 693.48 ± 4.74 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 36.76 ± 0.16 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log deleted file mode 100644 index d2dfb99..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1226.22 ± 161.95 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.27 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log deleted file mode 100644 index a49c884..0000000 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 667.32 ± 5.65 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.43 ± 0.01 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log similarity index 90% rename from benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log index b78ff15..48ba107 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1510.77 ± 39.47 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1524.44 ± 7.11 | | gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.68 ± 0.02 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log index fd6132f..48432ce 100644 --- a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 683.73 ± 6.54 | -| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.71 ± 0.01 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 689.36 ± 8.08 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.67 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log similarity index 60% rename from benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log rename to benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log index ca09bdd..64550ee 100644 --- a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 545.20 ± 3.50 | -| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 21.99 ± 0.05 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1272.69 ± 74.50 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.31 ± 0.01 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log new file mode 100644 index 0000000..9ed35f3 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 696.25 ± 7.26 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 32.95 ± 0.77 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log deleted file mode 100644 index a323fa7..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 374.46 ± 0.56 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.40 ± 0.00 | - -build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log deleted file mode 100644 index 46917bc..0000000 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 151.84 ± 1.65 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.17 ± 0.02 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log similarity index 86% rename from benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log rename to benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log index 84cd32e..7870940 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 381.31 ± 4.26 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 380.58 ± 1.49 | | gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.49 ± 0.00 | -build: 3f8752b55 (8743) +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log index 93402ed..9ff1bae 100644 --- a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.71 ± 1.55 | -| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.17 ± 0.00 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 150.63 ± 1.53 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.17 ± 0.03 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log new file mode 100644 index 0000000..64f36dd --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 377.93 ± 2.48 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.44 ± 0.00 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log new file mode 100644 index 0000000..d58c607 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.11 ± 1.83 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.18 ± 0.01 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log deleted file mode 100644 index 89cb0ee..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 313.26 ± 1.19 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.51 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log deleted file mode 100644 index 63b8782..0000000 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log +++ /dev/null @@ -1,8 +0,0 @@ -ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): - Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB -| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | -| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 141.61 ± 1.71 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.00 | - -build: 3f8752b55 (8743) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log similarity index 82% rename from benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log index 528dac0..41a06c4 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 293.56 ± 0.37 | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.51 ± 0.00 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 293.27 ± 0.37 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.50 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 97% rename from benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log index 2d739fe..3338698 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 136.26 ± 1.06 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 136.84 ± 1.27 | | gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log new file mode 100644 index 0000000..2c3483f --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 310.54 ± 0.73 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.52 ± 0.00 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log new file mode 100644 index 0000000..79d530a --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 140.71 ± 1.29 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.01 | + +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log similarity index 90% rename from benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log index c3adea8..704cc50 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 282.75 ± 0.35 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 282.42 ± 0.40 | | gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 97% rename from benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log index c197781..1835f21 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.30 ± 1.20 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.60 ± 1.37 | | gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log similarity index 68% rename from benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log rename to benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log index 93e42e7..b397b89 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 309.13 ± 0.94 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 308.17 ± 0.62 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log similarity index 77% rename from benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log index 41d2e44..fbdfc88 100644 --- a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 140.72 ± 1.33 | -| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 138.85 ± 1.58 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log similarity index 80% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log index 6bbf5e1..00caf55 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 954.68 ± 36.82 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.97 ± 0.02 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 945.82 ± 60.17 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.25 ± 0.01 | build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 92% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log index 3898436..fcc4071 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 296.45 ± 1.01 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.19 ± 8.13 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 311.38 ± 1.36 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 31.94 ± 7.16 | -build: 3f8752b55 (8743) +build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log similarity index 77% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log index 3be07b2..6ee9804 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 633.00 ± 7.37 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 51.15 ± 0.36 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 634.64 ± 3.80 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.70 ± 0.04 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log similarity index 83% rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log index 376176f..5b9fb05 100644 --- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 313.40 ± 10.22 | -| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.98 ± 0.01 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 304.96 ± 1.71 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.84 ± 0.12 | -build: 7957de9dc (8645) +build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log similarity index 80% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log index 5c9869d..0593f0d 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 2028.68 ± 6.34 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.58 ± 0.09 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 2029.48 ± 6.68 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.53 ± 0.11 | build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log index 3ba6ef6..5cf4d57 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 496.41 ± 2.25 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.87 ± 0.02 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 494.19 ± 1.26 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.81 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log similarity index 77% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log index efb2f9d..da0f1ce 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1631.41 ± 13.30 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.63 ± 0.10 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1631.69 ± 17.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.53 ± 0.10 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log index 42c242a..0c002a7 100644 --- a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 489.21 ± 2.12 | -| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.87 ± 0.03 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 490.56 ± 2.76 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.69 ± 0.16 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log similarity index 77% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log index fbf458f..e97dc44 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1541.76 ± 1.17 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.64 ± 0.16 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1512.85 ± 4.84 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.54 ± 0.15 | -build: 3f8752b55 (8743) +build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log similarity index 95% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log index 5fcb0c3..8f01f6a 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.68 ± 0.77 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.64 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 163.39 ± 1.12 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log similarity index 86% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log index c1c7e95..ef4d459 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1519.53 ± 5.33 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.57 ± 0.15 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1542.98 ± 7.94 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.52 ± 0.12 | -build: 7957de9dc (8645) +build: f53577432 (8942) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log similarity index 92% rename from benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log rename to benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log index a4ec636..3bbc460 100644 --- a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 162.73 ± 0.61 | -| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.65 ± 0.00 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 162.64 ± 0.31 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.64 ± 0.00 | -build: 3f8752b55 (8743) +build: f53577432 (8942) diff --git a/docs/results.json b/docs/results.json index 2575442..214ddb4 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2026-04-15T15:54:23Z", + "generated_at": "2026-04-29T05:45:18Z", "system_info": { "distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", @@ -17,14 +17,14 @@ "number": "8645" }, { - "hash": "ff5ef8278", - "number": "8763" + "hash": "f53577432", + "number": "8942" } ], "environments": [ "rocm-7_2", - "rocm-7_2_1", - "rocm-7_2_1-pr21344", + "rocm-7_2_2", + "rocm-7_2_2-pr21344", "rocm6_4_4", "rocm7-nightlies", "vulkan_amdvlk", @@ -36,15 +36,15 @@ { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 70.42, - "tps_std": 0.04, + "tps_mean": 72.06, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -54,7 +54,7 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -64,121 +64,9 @@ { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 2.96, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 16.77, - "tps_std": 0.09, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 2.06, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 78.36, - "tps_std": 0.04, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 125.03, - "file_size_gib": 70.31, - "name_params_b": 125.03, - "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", - "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, @@ -194,24 +82,24 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 17.67, + "tps_mean": 16.76, "tps_std": 0.05, "error": false, "error_type": null, @@ -222,19 +110,19 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -250,11 +138,123 @@ "file_size_gib": 70.31, "name_params_b": 125.03, "quant": "Q4_K_XL", - "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 77.83, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 17.1, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" } }, { @@ -646,15 +646,15 @@ { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 403.52, - "tps_std": 2.05, + "tps_mean": 403.45, + "tps_std": 2.11, "error": false, "error_type": null, "backend": "ROCm", @@ -664,7 +664,7 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -674,126 +674,14 @@ { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 21.37, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 87.98, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.34, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 404.18, - "tps_std": 1.94, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 29.94, - "file_size_gib": 55.79, - "name_params_b": 29.94, - "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "GLM-4.7-Flash-BF16-00001-of-00002", - "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 20.88, + "tps_mean": 21.04, "tps_std": 0.01, "error": false, "error_type": null, @@ -804,25 +692,25 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 88.16, - "tps_std": 0.08, + "tps_mean": 87.65, + "tps_std": 0.31, "error": false, "error_type": null, "backend": "ROCm", @@ -832,24 +720,77 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 15.07, + "tps_mean": 15.25, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": false, + "error_type": null, + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 87.91, "tps_std": 0.2, "error": false, "error_type": null, @@ -860,11 +801,39 @@ "file_size_gib": 55.79, "name_params_b": 29.94, "quant": "BF16", - "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.84, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" } }, { @@ -1343,15 +1312,15 @@ { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 947.24, - "tps_std": 201.85, + "tps_mean": 1058.1, + "tps_std": 2.19, "error": false, "error_type": null, "backend": "ROCm", @@ -1361,7 +1330,7 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -1371,14 +1340,14 @@ { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 33.23, + "tps_mean": 33.18, "tps_std": 0.0, "error": false, "error_type": null, @@ -1389,7 +1358,7 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -1399,15 +1368,15 @@ { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 91.69, - "tps_std": 0.58, + "tps_mean": 91.21, + "tps_std": 1.08, "error": false, "error_type": null, "backend": "ROCm", @@ -1417,7 +1386,7 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -1427,15 +1396,15 @@ { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.63, - "tps_std": 0.0, + "tps_mean": 20.62, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -1445,7 +1414,7 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -1455,15 +1424,15 @@ { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 914.78, - "tps_std": 3.38, + "tps_mean": 889.16, + "tps_std": 36.98, "error": false, "error_type": null, "backend": "ROCm", @@ -1473,24 +1442,24 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 32.73, + "tps_mean": 32.61, "tps_std": 0.0, "error": false, "error_type": null, @@ -1501,25 +1470,25 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 91.49, - "tps_std": 0.75, + "tps_mean": 91.01, + "tps_std": 1.51, "error": false, "error_type": null, "backend": "ROCm", @@ -1529,24 +1498,24 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.47, + "tps_mean": 20.38, "tps_std": 0.0, "error": false, "error_type": null, @@ -1557,11 +1526,11 @@ "file_size_gib": 32.7, "name_params_b": 29.94, "quant": "Q8_K_XL", - "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -1984,15 +1953,15 @@ { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 358.12, - "tps_std": 0.68, + "tps_mean": 356.93, + "tps_std": 1.86, "error": false, "error_type": null, "backend": "ROCm", @@ -2002,7 +1971,7 @@ "file_size_gib": 87.2, "name_params_b": 228.69, "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -2012,15 +1981,15 @@ { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.0, - "tps_std": 0.11, + "tps_mean": 22.97, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", @@ -2030,7 +1999,7 @@ "file_size_gib": 87.2, "name_params_b": 228.69, "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -2040,15 +2009,15 @@ { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 63.57, - "tps_std": 0.16, + "tps_mean": 65.86, + "tps_std": 0.58, "error": false, "error_type": null, "backend": "ROCm", @@ -2058,7 +2027,7 @@ "file_size_gib": 87.2, "name_params_b": 228.69, "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -2068,14 +2037,70 @@ { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.16, + "tps_mean": 6.1, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 236.39, + "tps_std": 1.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.64, "tps_std": 0.01, "error": false, "error_type": null, @@ -2086,81 +2111,25 @@ "file_size_gib": 87.2, "name_params_b": 228.69, "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "f53577432", + "number": "8942" } }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 238.25, - "tps_std": 1.14, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "ff5ef8278", - "number": "8763" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 22.98, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 87.2, - "name_params_b": 228.69, - "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "ff5ef8278", - "number": "8763" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", - "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 63.19, - "tps_std": 1.14, + "tps_mean": 64.53, + "tps_std": 0.53, "error": false, "error_type": null, "backend": "ROCm", @@ -2170,25 +2139,25 @@ "file_size_gib": 87.2, "name_params_b": 228.69, "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "ff5ef8278", - "number": "8763" + "hash": "f53577432", + "number": "8942" } }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.16, - "tps_std": 0.02, + "tps_mean": 6.04, + "tps_std": 0.17, "error": false, "error_type": null, "backend": "ROCm", @@ -2198,11 +2167,11 @@ "file_size_gib": 87.2, "name_params_b": 228.69, "quant": "Q3_K_S", - "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "ff5ef8278", - "number": "8763" + "hash": "f53577432", + "number": "8942" } }, { @@ -2653,230 +2622,6 @@ "number": "8743" } }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 376.88, - "tps_std": 1.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.0, - "tps_std": 0.1, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 63.91, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.21, - "tps_std": 0.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 245.64, - "tps_std": 1.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "ff5ef8278", - "number": "8763" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 24.09, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "ff5ef8278", - "number": "8763" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 66.16, - "tps_std": 0.41, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ff5ef8278", - "number": "8763" - } - }, - { - "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", - "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 6.33, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 228.69, - "file_size_gib": 94.93, - "name_params_b": 228.69, - "quant": "Q3_K_XL", - "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "ff5ef8278", - "number": "8763" - } - }, { "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", @@ -3328,15 +3073,15 @@ { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 731.49, - "tps_std": 3.16, + "tps_mean": 745.5, + "tps_std": 9.08, "error": false, "error_type": null, "backend": "ROCm", @@ -3346,7 +3091,7 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -3356,9 +3101,9 @@ { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, @@ -3374,7 +3119,7 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -3384,15 +3129,15 @@ { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 164.94, - "tps_std": 0.77, + "tps_mean": 164.19, + "tps_std": 1.96, "error": false, "error_type": null, "backend": "ROCm", @@ -3402,7 +3147,7 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -3412,9 +3157,9 @@ { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -3430,7 +3175,7 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -3440,15 +3185,15 @@ { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 747.81, - "tps_std": 7.01, + "tps_mean": 732.09, + "tps_std": 2.49, "error": false, "error_type": null, "backend": "ROCm", @@ -3458,19 +3203,19 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, @@ -3486,25 +3231,25 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 182.77, - "tps_std": 1.55, + "tps_mean": 182.01, + "tps_std": 2.08, "error": false, "error_type": null, "backend": "ROCm", @@ -3514,24 +3259,24 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 7.08, + "tps_mean": 7.07, "tps_std": 0.0, "error": false, "error_type": null, @@ -3542,11 +3287,11 @@ "file_size_gib": 25.16, "name_params_b": 13.51, "quant": "BF16", - "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -3938,15 +3683,15 @@ { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 334.32, - "tps_std": 1.13, + "tps_mean": 333.46, + "tps_std": 1.2, "error": false, "error_type": null, "backend": "ROCm", @@ -3956,7 +3701,7 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -3966,15 +3711,15 @@ { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.01, - "tps_std": 0.03, + "tps_mean": 15.99, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", @@ -3984,7 +3729,7 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -3994,15 +3739,15 @@ { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 308.76, - "tps_std": 0.39, + "tps_mean": 307.47, + "tps_std": 0.55, "error": false, "error_type": null, "backend": "ROCm", @@ -4012,7 +3757,7 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -4022,9 +3767,9 @@ { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -4040,7 +3785,7 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -4050,15 +3795,15 @@ { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 264.16, - "tps_std": 1.43, + "tps_mean": 267.67, + "tps_std": 1.7, "error": false, "error_type": null, "backend": "ROCm", @@ -4068,24 +3813,80 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 16.01, + "tps_mean": 16.18, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 304.86, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.79, "tps_std": 0.04, "error": false, "error_type": null, @@ -4096,67 +3897,11 @@ "file_size_gib": 78.02, "name_params_b": 120.67, "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 299.29, - "tps_std": 0.4, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", - "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.59, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 120.67, - "file_size_gib": 78.02, - "name_params_b": 120.67, - "quant": "Q4_K_XL", - "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -4610,15 +4355,15 @@ { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1504.86, - "tps_std": 5.67, + "tps_mean": 1498.5, + "tps_std": 6.83, "error": false, "error_type": null, "backend": "ROCm", @@ -4628,7 +4373,7 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -4638,15 +4383,15 @@ { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 67.86, - "tps_std": 0.21, + "tps_mean": 68.29, + "tps_std": 0.67, "error": false, "error_type": null, "backend": "ROCm", @@ -4656,7 +4401,7 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -4666,15 +4411,15 @@ { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 214.08, - "tps_std": 0.06, + "tps_mean": 213.71, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", @@ -4684,7 +4429,7 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -4694,15 +4439,15 @@ { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.54, - "tps_std": 0.02, + "tps_mean": 33.47, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -4712,7 +4457,7 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -4722,15 +4467,15 @@ { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1148.88, - "tps_std": 7.32, + "tps_mean": 1149.95, + "tps_std": 8.12, "error": false, "error_type": null, "backend": "ROCm", @@ -4740,25 +4485,25 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 67.8, - "tps_std": 0.73, + "tps_mean": 67.57, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", @@ -4768,25 +4513,25 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 194.78, - "tps_std": 0.05, + "tps_mean": 194.74, + "tps_std": 0.07, "error": false, "error_type": null, "backend": "ROCm", @@ -4796,25 +4541,25 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.46, - "tps_std": 0.02, + "tps_mean": 33.36, + "tps_std": 0.13, "error": false, "error_type": null, "backend": "ROCm", @@ -4824,11 +4569,11 @@ "file_size_gib": 17.35, "name_params_b": 30.53, "quant": "Q4_K_M", - "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -5282,15 +5027,15 @@ { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 404.64, - "tps_std": 1.49, + "tps_mean": 418.75, + "tps_std": 3.97, "error": false, "error_type": null, "backend": "ROCm", @@ -5300,7 +5045,7 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -5310,14 +5055,14 @@ { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 19.61, + "tps_mean": 19.62, "tps_std": 0.03, "error": false, "error_type": null, @@ -5328,7 +5073,7 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -5338,15 +5083,15 @@ { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 251.55, - "tps_std": 9.38, + "tps_mean": 207.29, + "tps_std": 0.79, "error": false, "error_type": null, "backend": "ROCm", @@ -5356,7 +5101,7 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -5366,15 +5111,15 @@ { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 16.51, - "tps_std": 0.01, + "tps_mean": 15.34, + "tps_std": 2.04, "error": false, "error_type": null, "backend": "ROCm", @@ -5384,7 +5129,7 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -5394,15 +5139,15 @@ { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 312.22, - "tps_std": 2.95, + "tps_mean": 314.59, + "tps_std": 2.13, "error": false, "error_type": null, "backend": "ROCm", @@ -5412,52 +5157,24 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 19.6, - "tps_std": 0.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 122.11, - "file_size_gib": 85.6, - "name_params_b": 122.11, - "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", - "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 250.19, + "tps_mean": 19.96, "tps_std": 0.15, "error": false, "error_type": null, @@ -5468,25 +5185,25 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 15.88, - "tps_std": 1.13, + "test": "pp2048 @ d32768", + "tps_mean": 235.56, + "tps_std": 9.94, "error": false, "error_type": null, "backend": "ROCm", @@ -5496,11 +5213,39 @@ "file_size_gib": 85.6, "name_params_b": 122.11, "quant": "Q5_K_XL", - "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.82, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" } }, { @@ -5952,17 +5697,17 @@ } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 511.62, - "tps_std": 3.03, + "tps_mean": 513.08, + "tps_std": 3.8, "error": false, "error_type": null, "backend": "ROCm", @@ -5972,7 +5717,7 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -5980,17 +5725,17 @@ } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.68, - "tps_std": 0.0, + "tps_mean": 23.46, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -6000,7 +5745,7 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -6008,17 +5753,17 @@ } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 381.4, - "tps_std": 0.84, + "tps_mean": 379.44, + "tps_std": 1.48, "error": false, "error_type": null, "backend": "ROCm", @@ -6028,7 +5773,7 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -6036,235 +5781,11 @@ } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 19.86, - "tps_std": 3.06, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 509.12, - "tps_std": 4.29, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 23.63, - "tps_std": 0.12, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 390.8, - "tps_std": 4.3, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 21.61, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 545.2, - "tps_std": 3.5, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 21.99, - "tps_std": 0.05, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 354.63, - "tps_std": 1.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 64.6, - "name_params_b": 34.66, - "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -6280,25 +5801,25 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 512.14, - "tps_std": 2.33, + "tps_mean": 509.14, + "tps_std": 3.56, "error": false, "error_type": null, "backend": "ROCm", @@ -6308,24 +5829,24 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.77, + "tps_mean": 23.98, "tps_std": 0.0, "error": false, "error_type": null, @@ -6336,25 +5857,25 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 383.01, - "tps_std": 0.52, + "tps_mean": 391.47, + "tps_std": 0.29, "error": false, "error_type": null, "backend": "ROCm", @@ -6364,24 +5885,24 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 22.11, + "tps_mean": 21.88, "tps_std": 0.01, "error": false, "error_type": null, @@ -6392,16 +5913,178 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 538.42, + "tps_std": 12.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 412.11, + "tps_std": 0.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 21.59, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "load", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 35.0, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "load", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 35.0, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": null + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6409,8 +6092,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 122.66, - "tps_std": 0.28, + "tps_mean": 122.43, + "tps_std": 0.23, "error": false, "error_type": null, "backend": "Vulkan", @@ -6420,16 +6103,16 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6437,7 +6120,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 11.56, + "tps_mean": 11.55, "tps_std": 0.01, "error": false, "error_type": null, @@ -6448,16 +6131,16 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6465,8 +6148,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 97.36, - "tps_std": 0.07, + "tps_mean": 98.18, + "tps_std": 0.09, "error": false, "error_type": null, "backend": "Vulkan", @@ -6476,16 +6159,16 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -6504,16 +6187,16 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -6521,8 +6204,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 317.93, - "tps_std": 2.34, + "tps_mean": 316.59, + "tps_std": 1.87, "error": false, "error_type": null, "backend": "Vulkan", @@ -6532,16 +6215,16 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -6560,16 +6243,16 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -6577,8 +6260,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 258.29, - "tps_std": 0.54, + "tps_mean": 267.07, + "tps_std": 0.91, "error": false, "error_type": null, "backend": "Vulkan", @@ -6588,16 +6271,16 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", - "model_clean": "Qwen3.5-35B-A3B-BF16", + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -6605,7 +6288,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 10.39, + "tps_mean": 10.37, "tps_std": 0.01, "error": false, "error_type": null, @@ -6616,35 +6299,35 @@ "file_size_gib": 64.6, "name_params_b": 34.66, "quant": "BF16", - "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1380.26, - "tps_std": 14.08, + "tps_mean": 1359.62, + "tps_std": 5.74, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -6652,27 +6335,27 @@ } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 47.71, - "tps_std": 0.01, + "tps_mean": 48.34, + "tps_std": 0.43, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -6680,27 +6363,27 @@ } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 680.8, - "tps_std": 1.05, + "tps_mean": 670.26, + "tps_std": 1.77, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -6708,27 +6391,27 @@ } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 40.13, - "tps_std": 0.14, + "tps_mean": 40.42, + "tps_std": 0.04, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -6736,120 +6419,120 @@ } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1098.31, - "tps_std": 7.28, + "tps_mean": 1089.21, + "tps_std": 5.82, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 47.89, - "tps_std": 0.32, + "tps_mean": 49.27, + "tps_std": 0.08, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 681.43, - "tps_std": 0.74, + "tps_mean": 703.58, + "tps_std": 0.51, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 40.19, - "tps_std": 0.03, + "tps_mean": 41.08, + "tps_std": 0.33, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -6857,27 +6540,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1080.97, - "tps_std": 5.37, + "tps_mean": 1067.33, + "tps_std": 6.85, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -6885,27 +6568,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 45.98, - "tps_std": 0.02, + "tps_mean": 48.23, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -6913,27 +6596,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 611.53, - "tps_std": 0.52, + "tps_mean": 769.99, + "tps_std": 2.79, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm6_4_4", "env_base": "rocm6_4_4", "env_variant": null, @@ -6941,27 +6624,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 40.17, - "tps_std": 0.02, + "tps_mean": 41.91, + "tps_std": 0.49, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -6969,27 +6652,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1111.77, - "tps_std": 11.14, + "tps_mean": 1091.62, + "tps_std": 10.45, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -6997,27 +6680,27 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 47.94, - "tps_std": 0.0, + "tps_mean": 49.1, + "tps_std": 0.15, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -7025,27 +6708,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 669.42, - "tps_std": 1.32, + "tps_mean": 664.89, + "tps_std": 1.48, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "rocm7-nightlies", "env_base": "rocm7", "env_variant": "nightlies", @@ -7053,27 +6736,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 41.96, - "tps_std": 0.03, + "tps_mean": 42.73, + "tps_std": 0.65, "error": false, "error_type": null, "backend": "ROCm", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -7081,27 +6764,27 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 663.48, - "tps_std": 2.94, + "tps_mean": 663.94, + "tps_std": 2.8, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -7109,7 +6792,7 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 58.17, + "tps_mean": 57.13, "tps_std": 0.04, "error": false, "error_type": null, @@ -7117,19 +6800,19 @@ "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -7137,27 +6820,27 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 289.32, - "tps_std": 1.23, + "tps_mean": 292.49, + "tps_std": 0.18, "error": false, "error_type": null, "backend": "Vulkan", "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "vulkan_amdvlk", "env_base": "vulkan_amdvlk", "env_variant": null, @@ -7165,63 +6848,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 44.23, - "tps_std": 0.08, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1016.96, - "tps_std": 39.65, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 59.11, + "tps_mean": 43.97, "tps_std": 0.06, "error": false, "error_type": null, @@ -7229,19 +6856,75 @@ "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, + "file_size_gib": 20.81, "name_params_b": 34.66, "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1045.16, + "tps_std": 5.79, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 58.82, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "env": "vulkan_radv", "env_base": "vulkan_radv", "env_variant": null, @@ -7249,7 +6932,707 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 673.76, + "tps_mean": 682.8, + "tps_std": 1.45, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 48.77, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1222.77, + "tps_std": 2.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 43.72, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 653.49, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.21, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1043.12, + "tps_std": 46.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.53, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 544.04, + "tps_std": 1.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.7, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1049.22, + "tps_std": 7.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 43.1, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 758.76, + "tps_std": 0.99, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 38.7, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1036.89, + "tps_std": 9.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 44.2, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 510.38, + "tps_std": 2.64, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 38.18, + "tps_std": 1.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 576.25, + "tps_std": 2.16, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 45.25, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 273.77, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 36.71, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 893.79, + "tps_std": 4.74, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.05, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 610.84, + "tps_std": 1.99, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 39.67, "tps_std": 0.15, "error": false, "error_type": null, @@ -7257,728 +7640,28 @@ "ngl": 99, "mmap": 0, "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 49.03, - "tps_std": 0.14, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 20.7, - "name_params_b": 34.66, - "quant": "Q4_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 739.92, - "tps_std": 29.08, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, + "file_size_gib": 35.8, "name_params_b": 34.66, "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.61, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 563.74, - "tps_std": 0.42, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.46, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 703.33, - "tps_std": 5.75, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 29.06, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 489.36, - "tps_std": 3.31, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.31, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 698.12, - "tps_std": 4.53, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 25.88, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 615.35, - "tps_std": 3.03, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm6_4_4", - "env_base": "rocm6_4_4", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.34, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 697.35, - "tps_std": 3.46, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.1, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 553.41, - "tps_std": 1.44, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "rocm7-nightlies", - "env_base": "rocm7", - "env_variant": "nightlies", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.57, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 415.49, - "tps_std": 2.3, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 30.74, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 228.9, - "tps_std": 0.28, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_amdvlk", - "env_base": "vulkan_amdvlk", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.62, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 710.93, - "tps_std": 18.62, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 28.92, - "tps_std": 0.01, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 531.15, - "tps_std": 0.2, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", - "env": "vulkan_radv", - "env_base": "vulkan_radv", - "env_variant": null, - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 26.32, - "tps_std": 0.03, - "error": false, - "error_type": null, - "backend": "Vulkan", - "ngl": 99, - "mmap": 0, - "params_b": 34.66, - "file_size_gib": 45.33, - "name_params_b": 34.66, - "quant": "Q8_K_XL", - "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 825.29, - "tps_std": 28.88, + "tps_mean": 836.74, + "tps_std": 6.15, "error": false, "error_type": null, "backend": "ROCm", @@ -7988,7 +7671,7 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -7998,15 +7681,15 @@ { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", "tps_mean": 22.95, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -8016,7 +7699,7 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -8026,15 +7709,15 @@ { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 619.64, - "tps_std": 9.1, + "tps_mean": 619.85, + "tps_std": 6.1, "error": false, "error_type": null, "backend": "ROCm", @@ -8044,7 +7727,7 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -8054,15 +7737,15 @@ { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 20.37, - "tps_std": 0.0, + "tps_mean": 20.38, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -8072,7 +7755,7 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -8082,15 +7765,15 @@ { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 864.78, - "tps_std": 7.29, + "tps_mean": 823.65, + "tps_std": 83.54, "error": false, "error_type": null, "backend": "ROCm", @@ -8100,24 +7783,24 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.66, + "tps_mean": 22.67, "tps_std": 0.0, "error": false, "error_type": null, @@ -8128,25 +7811,25 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 625.12, - "tps_std": 11.57, + "tps_mean": 628.14, + "tps_std": 5.88, "error": false, "error_type": null, "backend": "ROCm", @@ -8156,25 +7839,25 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 19.99, - "tps_std": 0.23, + "tps_mean": 20.15, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -8184,11 +7867,11 @@ "file_size_gib": 47.02, "name_params_b": 25.23, "quant": "BF16", - "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -8642,15 +8325,15 @@ { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1615.54, - "tps_std": 7.79, + "tps_mean": 1608.01, + "tps_std": 5.26, "error": false, "error_type": null, "backend": "ROCm", @@ -8660,7 +8343,7 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -8670,15 +8353,15 @@ { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 46.95, - "tps_std": 0.08, + "tps_mean": 47.13, + "tps_std": 0.29, "error": false, "error_type": null, "backend": "ROCm", @@ -8688,7 +8371,7 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -8698,15 +8381,15 @@ { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 692.91, - "tps_std": 9.6, + "tps_mean": 689.3, + "tps_std": 12.21, "error": false, "error_type": null, "backend": "ROCm", @@ -8716,7 +8399,7 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -8726,14 +8409,70 @@ { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 37.13, + "tps_mean": 37.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1299.9, + "tps_std": 11.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.59, "tps_std": 0.02, "error": false, "error_type": null, @@ -8744,81 +8483,25 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "7957de9dc", - "number": "8645" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1298.78, - "tps_std": 7.07, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 46.65, - "tps_std": 0.23, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 25.23, - "file_size_gib": 15.9, - "name_params_b": 25.23, - "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 691.44, - "tps_std": 4.5, + "tps_mean": 693.48, + "tps_std": 4.74, "error": false, "error_type": null, "backend": "ROCm", @@ -8828,25 +8511,25 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 36.86, - "tps_std": 0.02, + "tps_mean": 36.76, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", @@ -8856,11 +8539,11 @@ "file_size_gib": 15.9, "name_params_b": 25.23, "quant": "Q4_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -9314,15 +8997,15 @@ { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1510.77, - "tps_std": 39.47, + "tps_mean": 1524.44, + "tps_std": 7.11, "error": false, "error_type": null, "backend": "ROCm", @@ -9332,7 +9015,7 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -9342,9 +9025,9 @@ { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, @@ -9360,7 +9043,7 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -9370,15 +9053,15 @@ { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 683.73, - "tps_std": 6.54, + "tps_mean": 689.36, + "tps_std": 8.08, "error": false, "error_type": null, "backend": "ROCm", @@ -9388,7 +9071,7 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -9398,14 +9081,14 @@ { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.71, + "tps_mean": 33.67, "tps_std": 0.01, "error": false, "error_type": null, @@ -9416,7 +9099,7 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -9426,15 +9109,15 @@ { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1226.22, - "tps_std": 161.95, + "tps_mean": 1272.69, + "tps_std": 74.5, "error": false, "error_type": null, "backend": "ROCm", @@ -9444,24 +9127,24 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 41.27, + "tps_mean": 41.31, "tps_std": 0.01, "error": false, "error_type": null, @@ -9472,25 +9155,25 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 667.32, - "tps_std": 5.65, + "tps_mean": 696.25, + "tps_std": 7.26, "error": false, "error_type": null, "backend": "ROCm", @@ -9500,25 +9183,25 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 33.43, - "tps_std": 0.01, + "tps_mean": 32.95, + "tps_std": 0.77, "error": false, "error_type": null, "backend": "ROCm", @@ -9528,11 +9211,11 @@ "file_size_gib": 25.94, "name_params_b": 25.23, "quant": "Q8_K_XL", - "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -9986,15 +9669,15 @@ { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 374.46, - "tps_std": 0.56, + "tps_mean": 380.58, + "tps_std": 1.49, "error": false, "error_type": null, "backend": "ROCm", @@ -10004,7 +9687,7 @@ "file_size_gib": 57.18, "name_params_b": 30.7, "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -10014,121 +9697,9 @@ { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 3.4, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 153.71, - "tps_std": 1.55, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 3.17, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 381.31, - "tps_std": 4.26, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 30.7, - "file_size_gib": 57.18, - "name_params_b": 30.7, - "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gemma-4-31B-it-BF16-00001-of-00002", - "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, @@ -10144,25 +9715,25 @@ "file_size_gib": 57.18, "name_params_b": 30.7, "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 151.84, - "tps_std": 1.65, + "tps_mean": 150.63, + "tps_std": 1.53, "error": false, "error_type": null, "backend": "ROCm", @@ -10172,25 +9743,25 @@ "file_size_gib": 57.18, "name_params_b": 30.7, "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 3.17, - "tps_std": 0.02, + "tps_std": 0.03, "error": false, "error_type": null, "backend": "ROCm", @@ -10200,11 +9771,123 @@ "file_size_gib": 57.18, "name_params_b": 30.7, "quant": "BF16", - "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 377.93, + "tps_std": 2.48, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.44, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 153.11, + "tps_std": 1.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.18, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" } }, { @@ -10596,14 +10279,14 @@ { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 293.56, + "tps_mean": 293.27, "tps_std": 0.37, "error": false, "error_type": null, @@ -10614,7 +10297,7 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -10624,14 +10307,14 @@ { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 10.51, + "tps_mean": 10.5, "tps_std": 0.0, "error": false, "error_type": null, @@ -10642,7 +10325,7 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -10652,15 +10335,15 @@ { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 136.26, - "tps_std": 1.06, + "tps_mean": 136.84, + "tps_std": 1.27, "error": false, "error_type": null, "backend": "ROCm", @@ -10670,7 +10353,7 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -10680,9 +10363,9 @@ { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -10698,7 +10381,7 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -10708,15 +10391,15 @@ { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 313.26, - "tps_std": 1.19, + "tps_mean": 310.54, + "tps_std": 0.73, "error": false, "error_type": null, "backend": "ROCm", @@ -10726,24 +10409,24 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 10.51, + "tps_mean": 10.52, "tps_std": 0.0, "error": false, "error_type": null, @@ -10754,25 +10437,25 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 141.61, - "tps_std": 1.71, + "tps_mean": 140.71, + "tps_std": 1.29, "error": false, "error_type": null, "backend": "ROCm", @@ -10782,25 +10465,25 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 8.11, - "tps_std": 0.0, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -10810,11 +10493,11 @@ "file_size_gib": 17.46, "name_params_b": 30.7, "quant": "Q4_K_XL", - "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -11237,15 +10920,15 @@ { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 282.75, - "tps_std": 0.35, + "tps_mean": 282.42, + "tps_std": 0.4, "error": false, "error_type": null, "backend": "ROCm", @@ -11255,7 +10938,7 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -11265,9 +10948,9 @@ { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, @@ -11283,7 +10966,7 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -11293,15 +10976,15 @@ { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 132.3, - "tps_std": 1.2, + "tps_mean": 132.6, + "tps_std": 1.37, "error": false, "error_type": null, "backend": "ROCm", @@ -11311,7 +10994,7 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -11321,9 +11004,9 @@ { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -11339,7 +11022,7 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -11349,15 +11032,15 @@ { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 309.13, - "tps_std": 0.94, + "tps_mean": 308.17, + "tps_std": 0.62, "error": false, "error_type": null, "backend": "ROCm", @@ -11367,19 +11050,19 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, @@ -11395,25 +11078,25 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 140.72, - "tps_std": 1.33, + "tps_mean": 138.85, + "tps_std": 1.58, "error": false, "error_type": null, "backend": "ROCm", @@ -11423,19 +11106,19 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -11451,11 +11134,11 @@ "file_size_gib": 32.6, "name_params_b": 30.7, "quant": "Q8_K_XL", - "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -11878,15 +11561,15 @@ { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 954.68, - "tps_std": 36.82, + "tps_mean": 945.82, + "tps_std": 60.17, "error": false, "error_type": null, "backend": "ROCm", @@ -11896,7 +11579,7 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -11906,70 +11589,14 @@ { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.97, - "tps_std": 0.02, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 313.4, - "tps_std": 10.22, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1-pr21344", - "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 35.98, + "tps_mean": 51.25, "tps_std": 0.01, "error": false, "error_type": null, @@ -11980,7 +11607,7 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -11990,71 +11617,15 @@ { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 633.0, - "tps_std": 7.37, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 51.15, - "tps_std": 0.36, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 116.83, - "file_size_gib": 59.02, - "name_params_b": 116.83, - "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "gpt-oss-120b-mxfp4-00001-of-00003", - "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 296.45, - "tps_std": 1.01, + "tps_mean": 311.38, + "tps_std": 1.36, "error": false, "error_type": null, "backend": "ROCm", @@ -12064,25 +11635,25 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" } }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 31.19, - "tps_std": 8.13, + "tps_mean": 31.94, + "tps_std": 7.16, "error": false, "error_type": null, "backend": "ROCm", @@ -12092,11 +11663,123 @@ "file_size_gib": 59.02, "name_params_b": 116.83, "quant": "MXFP4", - "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 634.64, + "tps_std": 3.8, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.7, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 304.96, + "tps_std": 1.71, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 35.84, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" } }, { @@ -12550,15 +12233,15 @@ { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 2028.68, - "tps_std": 6.34, + "tps_mean": 2029.48, + "tps_std": 6.68, "error": false, "error_type": null, "backend": "ROCm", @@ -12568,7 +12251,7 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -12578,15 +12261,15 @@ { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 72.58, - "tps_std": 0.09, + "tps_mean": 72.53, + "tps_std": 0.11, "error": false, "error_type": null, "backend": "ROCm", @@ -12596,7 +12279,7 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -12606,15 +12289,15 @@ { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 496.41, - "tps_std": 2.25, + "tps_mean": 494.19, + "tps_std": 1.26, "error": false, "error_type": null, "backend": "ROCm", @@ -12624,7 +12307,7 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -12634,15 +12317,15 @@ { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 51.87, - "tps_std": 0.02, + "tps_mean": 51.81, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "ROCm", @@ -12652,7 +12335,7 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -12662,15 +12345,15 @@ { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1631.41, - "tps_std": 13.3, + "tps_mean": 1631.69, + "tps_std": 17.03, "error": false, "error_type": null, "backend": "ROCm", @@ -12680,24 +12363,24 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 72.63, + "tps_mean": 72.53, "tps_std": 0.1, "error": false, "error_type": null, @@ -12708,25 +12391,25 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 489.21, - "tps_std": 2.12, + "tps_mean": 490.56, + "tps_std": 2.76, "error": false, "error_type": null, "backend": "ROCm", @@ -12736,25 +12419,25 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", - "env": "rocm-7_2_1", + "env": "rocm-7_2_2", "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 51.87, - "tps_std": 0.03, + "tps_mean": 51.69, + "tps_std": 0.16, "error": false, "error_type": null, "backend": "ROCm", @@ -12764,11 +12447,11 @@ "file_size_gib": 11.27, "name_params_b": 20.91, "quant": "MXFP4", - "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "f53577432", + "number": "8942" } }, { @@ -13222,15 +12905,15 @@ { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 1519.53, - "tps_std": 5.33, + "tps_mean": 1512.85, + "tps_std": 4.84, "error": false, "error_type": null, "backend": "ROCm", @@ -13240,7 +12923,7 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -13250,14 +12933,14 @@ { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 50.57, + "tps_mean": 50.54, "tps_std": 0.15, "error": false, "error_type": null, @@ -13268,7 +12951,7 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -13278,15 +12961,15 @@ { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 164.68, - "tps_std": 0.77, + "tps_mean": 163.39, + "tps_std": 1.12, "error": false, "error_type": null, "backend": "ROCm", @@ -13296,7 +12979,7 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { "hash": "7957de9dc", @@ -13306,121 +12989,9 @@ { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1-pr21344", + "env": "rocm-7_2_2-pr21344", "env_base": "rocm", - "env_variant": "7_2_1-pr21344", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "tg32 @ d32768", - "tps_mean": 5.64, - "tps_std": 0.0, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "7957de9dc", - "number": "8645" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "pp512", - "tps_mean": 1541.76, - "tps_std": 1.17, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "default", - "context_tokens": null, - "test": "tg128", - "tps_mean": 50.64, - "tps_std": 0.16, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", - "fa": true, - "context": "longctx32768", - "context_tokens": 32768, - "test": "pp2048 @ d32768", - "tps_mean": 162.73, - "tps_std": 0.61, - "error": false, - "error_type": null, - "backend": "ROCm", - "ngl": 99, - "mmap": 0, - "params_b": 6.74, - "file_size_gib": 3.56, - "name_params_b": 6.74, - "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log", - "rpc": false, - "build": { - "hash": "3f8752b55", - "number": "8743" - } - }, - { - "model": "llama-2-7b.Q4_0", - "model_clean": "llama-2-7b.Q4_0", - "env": "rocm-7_2_1", - "env_base": "rocm", - "env_variant": "7_2_1", + "env_variant": "7_2_2-pr21344", "fa": true, "context": "longctx32768", "context_tokens": 32768, @@ -13436,11 +13007,123 @@ "file_size_gib": 3.56, "name_params_b": 6.74, "quant": "Q4_0", - "log": "results/llama-2-7b.Q4_0__rocm-7_2_1__fa1__longctx32768.log", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx32768.log", "rpc": false, "build": { - "hash": "3f8752b55", - "number": "8743" + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1542.98, + "tps_std": 7.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.52, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 162.64, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "f53577432", + "number": "8942" } }, {