diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..6255b14 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | pp512 | 70.42 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 1 | 0 | tg128 | 2.96 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..d662fe6 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 16.77 ± 0.09 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 2.06 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..867aa2a --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 403.52 ± 2.05 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 21.37 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..41125bf --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 87.98 ± 0.28 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.34 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..1f41d31 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | pp512 | 947.24 ± 201.85 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 1 | 0 | tg128 | 33.23 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..717cb8c --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 91.69 ± 0.58 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.63 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..954e5a1 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 374.66 ± 1.20 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 23.59 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..cce6523 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 62.97 ± 0.05 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.36 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..2657c15 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | pp512 | 731.49 ± 3.16 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 1 | 0 | tg128 | 8.42 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..ac43c79 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.94 ± 0.77 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 7.08 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..6b25238 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | pp512 | 334.32 ± 1.13 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 1 | 0 | tg128 | 16.01 ± 0.03 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..8ca4410 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 308.76 ± 0.39 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 15.60 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..e86faa7 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 1504.86 ± 5.67 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 67.86 ± 0.21 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..1470a47 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 214.08 ± 0.06 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.54 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..e371ccd --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | pp512 | 404.64 ± 1.49 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 1 | 0 | tg128 | 19.61 ± 0.03 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..2d39364 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 251.55 ± 9.38 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 16.51 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..c171b99 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 511.62 ± 3.03 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 23.68 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..599c971 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 381.40 ± 0.84 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 19.86 ± 3.06 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..7489d62 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 1380.26 ± 14.08 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 47.71 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..81afdf3 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 680.80 ± 1.05 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.70 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 40.13 ± 0.14 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..3dae2a9 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | pp512 | 739.92 ± 29.08 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 1 | 0 | tg128 | 29.61 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..27e4c62 --- /dev/null +++ b/benchmark/results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 563.74 ± 0.42 | +| qwen35moe 35B.A3B Q8_0 | 45.33 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 26.46 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..9172211 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 825.29 ± 28.88 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 22.95 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..f533e1b --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 619.64 ± 9.10 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 20.37 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..eccdf91 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1615.54 ± 7.79 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 46.95 ± 0.08 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..e0c3111 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 692.91 ± 9.60 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 37.13 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..b78ff15 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | pp512 | 1510.77 ± 39.47 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 1 | 0 | tg128 | 41.68 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..fd6132f --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 683.73 ± 6.54 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 33.71 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..a323fa7 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 374.46 ± 0.56 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 3.40 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..93402ed --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 153.71 ± 1.55 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 3.17 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..528dac0 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 293.56 ± 0.37 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 10.51 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..2d739fe --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 136.26 ± 1.06 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 8.11 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..c3adea8 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | pp512 | 282.75 ± 0.35 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 1 | 0 | tg128 | 6.16 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..c197781 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 132.30 ± 1.20 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.26 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..6bbf5e1 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 954.68 ± 36.82 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 50.97 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..376176f --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 313.40 ± 10.22 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 35.98 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..5c9869d --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 2028.68 ± 6.34 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 72.58 ± 0.09 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..3ba6ef6 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 496.41 ± 2.25 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 51.87 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..c1c7e95 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | pp512 | 1519.53 ± 5.33 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 1 | 0 | tg128 | 50.57 ± 0.15 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..5fcb0c3 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 164.68 ± 0.77 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 5.64 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/docs/results.json b/docs/results.json index 2facb03..7931435 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2026-04-15T07:08:17Z", + "generated_at": "2026-04-15T10:39:02Z", "system_info": { "distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", @@ -12,6 +12,10 @@ "hash": "3f8752b55", "number": "8743" }, + { + "hash": "7957de9dc", + "number": "8645" + }, { "hash": "ff5ef8278", "number": "8763" @@ -20,6 +24,7 @@ "environments": [ "rocm-7_2", "rocm-7_2_1", + "rocm-7_2_1-pr21344", "rocm6_4_4", "rocm7-nightlies", "vulkan_amdvlk", @@ -28,6 +33,118 @@ "notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second" }, "runs": [ + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 70.42, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 2.96, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 16.77, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 2.06, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", @@ -526,6 +643,118 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 403.52, + "tps_std": 2.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.37, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 87.98, + "tps_std": 0.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.34, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -1111,6 +1340,118 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 947.24, + "tps_std": 201.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 33.23, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 91.69, + "tps_std": 0.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.63, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", @@ -1640,6 +1981,118 @@ "number": "8743" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 374.66, + "tps_std": 1.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 94.93, + "name_params_b": 228.69, + "quant": "Q3_K_XL", + "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 94.93, + "name_params_b": 228.69, + "quant": "Q3_K_XL", + "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 62.97, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 94.93, + "name_params_b": 228.69, + "quant": "Q3_K_XL", + "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", + "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.36, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 94.93, + "name_params_b": 228.69, + "quant": "Q3_K_XL", + "log": "results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", @@ -2200,6 +2653,118 @@ "number": "8743" } }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 731.49, + "tps_std": 3.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 8.42, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 164.94, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 7.08, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", @@ -2698,6 +3263,118 @@ "number": "8743" } }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 334.32, + "tps_std": 1.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 16.01, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 308.76, + "tps_std": 0.39, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 15.6, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", @@ -3258,6 +3935,118 @@ "number": "8743" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1504.86, + "tps_std": 5.67, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 67.86, + "tps_std": 0.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 214.08, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.54, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -3818,6 +4607,118 @@ "number": "8743" } }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 404.64, + "tps_std": 1.49, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 19.61, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 251.55, + "tps_std": 9.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 16.51, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", @@ -4378,6 +5279,118 @@ "number": "8743" } }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 511.62, + "tps_std": 3.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.68, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 381.4, + "tps_std": 0.84, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.5-35B-A3B-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 19.86, + "tps_std": 3.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.5-35B-A3B-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.5-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.5-35B-A3B-BF16", @@ -4938,6 +5951,118 @@ "number": "8743" } }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1380.26, + "tps_std": 14.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 47.71, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 680.8, + "tps_std": 1.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 40.13, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.7, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.5-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.5-35B-A3B-UD-Q4_K_XL", @@ -5498,6 +6623,118 @@ "number": "8743" } }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 739.92, + "tps_std": 29.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 29.61, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 563.74, + "tps_std": 0.42, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 26.46, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 45.33, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.5-35B-A3B-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.5-35B-A3B-UD-Q8_K_XL", "model_clean": "Qwen3.5-35B-A3B-UD-Q8_K_XL", @@ -6058,6 +7295,118 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 825.29, + "tps_std": 28.88, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 619.64, + "tps_std": 9.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 20.37, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", @@ -6618,6 +7967,118 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1615.54, + "tps_std": 7.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 46.95, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 692.91, + "tps_std": 9.6, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 37.13, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", @@ -7178,6 +8639,118 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1510.77, + "tps_std": 39.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 41.68, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 683.73, + "tps_std": 6.54, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 33.71, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", @@ -7738,6 +9311,118 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 374.46, + "tps_std": 0.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 3.4, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 153.71, + "tps_std": 1.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 3.17, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", @@ -8236,6 +9921,118 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 293.56, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 10.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 136.26, + "tps_std": 1.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 8.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", @@ -8765,6 +10562,118 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 282.75, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 6.16, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 132.3, + "tps_std": 1.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.26, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", @@ -9294,6 +11203,118 @@ "number": "8743" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 954.68, + "tps_std": 36.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.97, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 313.4, + "tps_std": 10.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 35.98, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -9854,6 +11875,118 @@ "number": "8743" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 2028.68, + "tps_std": 6.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 72.58, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 496.41, + "tps_std": 2.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 51.87, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -10414,6 +12547,118 @@ "number": "8743" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 1519.53, + "tps_std": 5.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 50.57, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 164.68, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 5.64, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0",