From 9016c0f8f8fe65eb679f825a6b73de3efb7c110e Mon Sep 17 00:00:00 2001 From: Donato Capitella Date: Wed, 15 Apr 2026 16:54:34 +0100 Subject: [PATCH] update benchs --- ...0001-of-00003__rocm-7_2_1-pr21344__fa1.log | 8 + ..._rocm-7_2_1-pr21344__fa1__longctx32768.log | 8 + ...Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log | 8 + ...f-00003__rocm-7_2_1__fa1__longctx32768.log | 8 + ...-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log | 8 + ...of-00003__rocm6_4_4__fa1__longctx32768.log | 8 + ...S-00001-of-00003__rocm7-nightlies__fa1.log | 8 + ...03__rocm7-nightlies__fa1__longctx32768.log | 8 + ...K_S-00001-of-00003__vulkan_amdvlk__fa1.log | 8 + ...0003__vulkan_amdvlk__fa1__longctx32768.log | 8 + ...3_K_S-00001-of-00003__vulkan_radv__fa1.log | 8 + ...-00003__vulkan_radv__fa1__longctx32768.log | 8 + ...0001-of-00004__rocm-7_2_1-pr21344__fa1.log | 4 +- ..._rocm-7_2_1-pr21344__fa1__longctx32768.log | 4 +- ...3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log | 4 +- ...f-00004__rocm-7_2_1__fa1__longctx32768.log | 4 +- ...Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log | 4 +- ...of-00004__rocm6_4_4__fa1__longctx32768.log | 4 +- ...L-00001-of-00004__rocm7-nightlies__fa1.log | 4 +- ...04__rocm7-nightlies__fa1__longctx32768.log | 4 +- ..._XL-00001-of-00004__vulkan_amdvlk__fa1.log | 4 +- ...0004__vulkan_amdvlk__fa1__longctx32768.log | 4 +- ..._K_XL-00001-of-00004__vulkan_radv__fa1.log | 2 +- ...-00004__vulkan_radv__fa1__longctx32768.log | 4 +- docs/results.json | 762 ++++++++++++++++-- 25 files changed, 836 insertions(+), 68 deletions(-) create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log create mode 100644 benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log new file mode 100644 index 0000000..f015431 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 358.12 ± 0.68 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 23.00 ± 0.11 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log new file mode 100644 index 0000000..d31910f --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 63.57 ± 0.16 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.16 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log new file mode 100644 index 0000000..3e48901 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 238.25 ± 1.14 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.98 ± 0.12 | + +build: ff5ef8278 (8763) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log new file mode 100644 index 0000000..9f15e49 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 63.19 ± 1.14 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.16 ± 0.02 | + +build: ff5ef8278 (8763) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log new file mode 100644 index 0000000..322f9e6 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 236.56 ± 1.44 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 21.55 ± 0.00 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log new file mode 100644 index 0000000..ffb9df3 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 77.34 ± 1.21 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.54 ± 0.09 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log new file mode 100644 index 0000000..19ff545 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 225.65 ± 0.79 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 21.44 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log new file mode 100644 index 0000000..1c157f4 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 64.10 ± 0.50 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.35 ± 0.28 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log new file mode 100644 index 0000000..e820993 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 202.08 ± 0.31 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 24.94 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log new file mode 100644 index 0000000..d513da2 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 31.48 ± 0.09 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.27 ± 0.01 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log new file mode 100644 index 0000000..b06e345 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 231.25 ± 0.79 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 30.70 ± 0.02 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log new file mode 100644 index 0000000..cecf7a1 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 32.00 ± 0.03 | +| minimax-m2 230B.A10B Q3_K - Small | 87.20 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.47 ± 0.04 | + +build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log index 954e5a1..e4c9a5f 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 374.66 ± 1.20 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 23.59 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 376.88 ± 1.23 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 24.00 ± 0.10 | build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log index cce6523..59b2482 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1-pr21344__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 62.97 ± 0.05 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.36 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 63.91 ± 0.23 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.21 ± 0.26 | build: 7957de9dc (8645) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log index 4cd2596..f95fa32 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 245.94 ± 1.48 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 24.11 ± 0.05 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 245.64 ± 1.36 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 24.09 ± 0.06 | build: ff5ef8278 (8763) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log index 243dd87..fed4fc6 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm-7_2_1__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 66.09 ± 0.53 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.32 ± 0.05 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 66.16 ± 0.41 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.33 ± 0.06 | build: ff5ef8278 (8763) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log index 3f5143a..6caf1c2 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 245.63 ± 1.62 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.63 ± 0.07 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 245.87 ± 1.58 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.72 ± 0.01 | build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log index 2282246..bed6c9f 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm6_4_4__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 79.52 ± 0.47 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.69 ± 0.15 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 81.20 ± 1.25 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.69 ± 0.12 | build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log index 41d8882..4f7dba8 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 237.43 ± 0.96 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.63 ± 0.05 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | pp512 | 238.13 ± 1.15 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 1 | 0 | tg128 | 22.54 ± 0.27 | build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log index 2090c73..24d6e3f 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__rocm7-nightlies__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB | model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 61.53 ± 0.27 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.52 ± 0.17 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d32768 | 61.41 ± 0.44 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d32768 | 6.64 ± 0.15 | build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log index a893a6a..192b97a 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 203.53 ± 0.70 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 24.06 ± 0.03 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 203.65 ± 1.00 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 24.07 ± 0.02 | build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log index 3cd97d8..621b928 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_amdvlk__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 31.94 ± 0.07 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.05 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 31.64 ± 0.19 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 10.06 ± 0.00 | build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log index 0f184ff..366fe9b 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 234.24 ± 1.27 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp512 | 230.72 ± 8.67 | | minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg128 | 29.48 ± 0.01 | build: 3f8752b55 (8743) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log index 8cb49dc..9c5cc93 100644 --- a/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004__vulkan_radv__fa1__longctx32768.log @@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices: ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat | model | size | params | backend | ngl | fa | mmap | test | t/s | | ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 32.31 ± 0.16 | -| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.20 ± 0.01 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | pp2048 @ d32768 | 32.06 ± 0.16 | +| minimax-m2 230B.A10B Q3_K - Medium | 94.93 GiB | 228.69 B | Vulkan | 99 | 1 | 0 | tg32 @ d32768 | 14.12 ± 0.02 | build: 3f8752b55 (8743) diff --git a/docs/results.json b/docs/results.json index 7931435..2575442 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2026-04-15T10:39:02Z", + "generated_at": "2026-04-15T15:54:23Z", "system_info": { "distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", @@ -1981,6 +1981,678 @@ "number": "8743" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 358.12, + "tps_std": 0.68, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 23.0, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 63.57, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1-pr21344", + "env_base": "rocm", + "env_variant": "7_2_1-pr21344", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.16, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1-pr21344__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 238.25, + "tps_std": 1.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "ff5ef8278", + "number": "8763" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 22.98, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1.log", + "rpc": false, + "build": { + "hash": "ff5ef8278", + "number": "8763" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 63.19, + "tps_std": 1.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "ff5ef8278", + "number": "8763" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_1", + "env_base": "rocm", + "env_variant": "7_2_1", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.16, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_1__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "ff5ef8278", + "number": "8763" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 236.56, + "tps_std": 1.44, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.55, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 77.34, + "tps_std": 1.21, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.54, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 225.65, + "tps_std": 0.79, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 21.44, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 64.1, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 6.35, + "tps_std": 0.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 202.08, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 24.94, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 31.48, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 10.27, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "pp512", + "tps_mean": 231.25, + "tps_std": 0.79, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "default", + "context_tokens": null, + "test": "tg128", + "tps_mean": 30.7, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "pp2048 @ d32768", + "tps_mean": 32.0, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx32768", + "context_tokens": 32768, + "test": "tg32 @ d32768", + "tps_mean": 14.47, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 228.69, + "file_size_gib": 87.2, + "name_params_b": 228.69, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx32768.log", + "rpc": false, + "build": { + "hash": "3f8752b55", + "number": "8743" + } + }, { "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", @@ -1991,8 +2663,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 374.66, - "tps_std": 1.2, + "tps_mean": 376.88, + "tps_std": 1.23, "error": false, "error_type": null, "backend": "ROCm", @@ -2019,8 +2691,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 23.59, - "tps_std": 0.01, + "tps_mean": 24.0, + "tps_std": 0.1, "error": false, "error_type": null, "backend": "ROCm", @@ -2047,8 +2719,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 62.97, - "tps_std": 0.05, + "tps_mean": 63.91, + "tps_std": 0.23, "error": false, "error_type": null, "backend": "ROCm", @@ -2075,8 +2747,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.36, - "tps_std": 0.01, + "tps_mean": 6.21, + "tps_std": 0.26, "error": false, "error_type": null, "backend": "ROCm", @@ -2103,8 +2775,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 245.94, - "tps_std": 1.48, + "tps_mean": 245.64, + "tps_std": 1.36, "error": false, "error_type": null, "backend": "ROCm", @@ -2131,8 +2803,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 24.11, - "tps_std": 0.05, + "tps_mean": 24.09, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -2159,8 +2831,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 66.09, - "tps_std": 0.53, + "tps_mean": 66.16, + "tps_std": 0.41, "error": false, "error_type": null, "backend": "ROCm", @@ -2187,8 +2859,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.32, - "tps_std": 0.05, + "tps_mean": 6.33, + "tps_std": 0.06, "error": false, "error_type": null, "backend": "ROCm", @@ -2215,8 +2887,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 245.63, - "tps_std": 1.62, + "tps_mean": 245.87, + "tps_std": 1.58, "error": false, "error_type": null, "backend": "ROCm", @@ -2243,8 +2915,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.63, - "tps_std": 0.07, + "tps_mean": 22.72, + "tps_std": 0.01, "error": false, "error_type": null, "backend": "ROCm", @@ -2271,8 +2943,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 79.52, - "tps_std": 0.47, + "tps_mean": 81.2, + "tps_std": 1.25, "error": false, "error_type": null, "backend": "ROCm", @@ -2300,7 +2972,7 @@ "context_tokens": 32768, "test": "tg32 @ d32768", "tps_mean": 6.69, - "tps_std": 0.15, + "tps_std": 0.12, "error": false, "error_type": null, "backend": "ROCm", @@ -2327,8 +2999,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 237.43, - "tps_std": 0.96, + "tps_mean": 238.13, + "tps_std": 1.15, "error": false, "error_type": null, "backend": "ROCm", @@ -2355,8 +3027,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 22.63, - "tps_std": 0.05, + "tps_mean": 22.54, + "tps_std": 0.27, "error": false, "error_type": null, "backend": "ROCm", @@ -2383,8 +3055,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 61.53, - "tps_std": 0.27, + "tps_mean": 61.41, + "tps_std": 0.44, "error": false, "error_type": null, "backend": "ROCm", @@ -2411,8 +3083,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 6.52, - "tps_std": 0.17, + "tps_mean": 6.64, + "tps_std": 0.15, "error": false, "error_type": null, "backend": "ROCm", @@ -2439,8 +3111,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 203.53, - "tps_std": 0.7, + "tps_mean": 203.65, + "tps_std": 1.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -2467,8 +3139,8 @@ "context": "default", "context_tokens": null, "test": "tg128", - "tps_mean": 24.06, - "tps_std": 0.03, + "tps_mean": 24.07, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan", @@ -2495,8 +3167,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 31.94, - "tps_std": 0.07, + "tps_mean": 31.64, + "tps_std": 0.19, "error": false, "error_type": null, "backend": "Vulkan", @@ -2523,8 +3195,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 10.05, - "tps_std": 0.01, + "tps_mean": 10.06, + "tps_std": 0.0, "error": false, "error_type": null, "backend": "Vulkan", @@ -2551,8 +3223,8 @@ "context": "default", "context_tokens": null, "test": "pp512", - "tps_mean": 234.24, - "tps_std": 1.27, + "tps_mean": 230.72, + "tps_std": 8.67, "error": false, "error_type": null, "backend": "Vulkan", @@ -2607,7 +3279,7 @@ "context": "longctx32768", "context_tokens": 32768, "test": "pp2048 @ d32768", - "tps_mean": 32.31, + "tps_mean": 32.06, "tps_std": 0.16, "error": false, "error_type": null, @@ -2635,8 +3307,8 @@ "context": "longctx32768", "context_tokens": 32768, "test": "tg32 @ d32768", - "tps_mean": 14.2, - "tps_std": 0.01, + "tps_mean": 14.12, + "tps_std": 0.02, "error": false, "error_type": null, "backend": "Vulkan",