diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..a4c2062 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 7.94 ± 0.04 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.49 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..80cc879 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 8.12 ± 0.03 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.48 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..ddb75a1 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 8.16 ± 0.08 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.58 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..5a74d0f --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 8.45 ± 0.02 | +| llama ?B Q4_K - Medium | 70.31 GiB | 125.03 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 1.43 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..2d1f68b --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f69acb1c465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f69acb1c83b] +/lib64/libggml-base.so.0(+0x16f19) [0x7f69acb2ef19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f69ac8bfbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f69ac8a9d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f69ac8bfea8] +/lib64/libggml-vulkan.so.0(+0x1728d) [0x7f69acbf128d] +/lib64/libggml-vulkan.so.0(+0x10a410) [0x7f69acce4410] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f69acb38192] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f69b07a6c70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f69b07a9255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f69b07af98f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f69b07b132e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x4038b9] +/lib64/libc.so.6(+0x35b5) [0x7f69ac5905b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f69ac590668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..9523902 --- /dev/null +++ b/benchmark/results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,25 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +radv/amdgpu: The CS has been cancelled because the context is lost. This context is innocent. +/lib64/libggml-base.so.0(+0x4465) [0x7fd3fbd2c465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fd3fbd2c83b] +/lib64/libggml-base.so.0(+0x16f19) [0x7fd3fbd3ef19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fd3fbacfbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fd3fbab9d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fd3fbacfea8] +/lib64/libggml-vulkan.so.0(+0x1569b) [0x7fd3fbdff69b] +/lib64/libggml-vulkan.so.0(+0x14505a) [0x7fd3fbf2f05a] +/lib64/libggml-vulkan.so.0(+0x145c31) [0x7fd3fbf2fc31] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fd3fbd485d3] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fd3ff9b6c70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fd3ff9b9255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fd3ff9bf98f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fd3ff9c132e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x403a5b] +/lib64/libc.so.6(+0x35b5) [0x7fd3fb7a05b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fd3fb7a0668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_radv] Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..6d03d4a --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 46.54 ± 0.17 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.91 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..503a469 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 46.47 ± 0.09 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.63 ± 0.19 | + +build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..563fcbe --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 50.95 ± 0.07 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.15 ± 0.09 | + +build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..8f535c1 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 45.39 ± 0.36 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 11.22 ± 0.13 | + +build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..3dd0992 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,24 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fa5c507b465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa5c507b83b] +/lib64/libggml-base.so.0(+0x16f19) [0x7fa5c508df19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa5c4e1ebfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa5c4e08d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fa5c4e1eea8] +/lib64/libggml-vulkan.so.0(+0x1569b) [0x7fa5c514e69b] +/lib64/libggml-vulkan.so.0(+0x14505a) [0x7fa5c527e05a] +/lib64/libggml-vulkan.so.0(+0x145c31) [0x7fa5c527ec31] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x7f3) [0x7fa5c50975d3] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa5c8d05c70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa5c8d08255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa5c8d0e98f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fa5c8d1032e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x403a5b] +/lib64/libc.so.6(+0x35b5) [0x7fa5c4aef5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa5c4aef668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] GLM-4.7-Flash-BF16-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..094219e --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 61.52 ± 0.01 | +| deepseek2 30B.A3B BF16 | 55.79 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 6.57 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..2445d7c --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 47.63 ± 0.04 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.98 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..0d48067 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 47.62 ± 0.15 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.87 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..8344413 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 51.79 ± 0.10 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.36 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..0d230ee --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 46.43 ± 0.14 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.43 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..8abe7d5 --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fb35126c465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fb35126c83b] +/lib64/libggml-base.so.0(+0x16f19) [0x7fb35127ef19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fb35100fbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fb350ff9d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fb35100fea8] +/lib64/libggml-vulkan.so.0(+0x1728d) [0x7fb35134128d] +/lib64/libggml-vulkan.so.0(+0x10a410) [0x7fb351434410] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fb351288192] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fb354ef6c70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fb354ef9255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fb354eff98f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fb354f0132e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x403a5b] +/lib64/libc.so.6(+0x35b5) [0x7fb350ce05b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fb350ce0668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] GLM-4.7-Flash-UD-Q8_K_XL__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..7257eea --- /dev/null +++ b/benchmark/results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 72.80 ± 0.00 | +| deepseek2 30B.A3B Q8_0 | 32.70 GiB | 29.94 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 14.39 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..20d42cf --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,6 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +main: error: failed to load model '/home/kyuz0/models/mini-max-m2.7/UD-Q3_K_S/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003.gguf' +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +✖ ! [rocm-7_2_2-pr21344] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..9680e41 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,6 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +main: error: failed to load model '/home/kyuz0/models/mini-max-m2.7/UD-Q3_K_S/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003.gguf' +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +✖ ! [rocm-7_2_2] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..05901f6 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,3 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +✖ ! [rocm6_4_4] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..3dad4b5 --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,3 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +✖ ! [rocm7-nightlies] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..62960ea --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,3 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +✖ ! [vulkan_amdvlk] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..302dcdd --- /dev/null +++ b/benchmark/results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,3 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +✖ ! [vulkan_radv] MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..c268707 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 75.02 ± 1.56 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.11 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..9bdaeeb --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 80.08 ± 2.35 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.11 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..b1347cd --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 71.24 ± 0.32 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.09 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..3162148 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 73.23 ± 0.94 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 6.11 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..fe396cf --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f41f07d5465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f41f07d583b] +/lib64/libggml-base.so.0(+0x16f19) [0x7f41f07e7f19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f41f0578bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f41f0562d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f41f0578ea8] +/lib64/libggml-vulkan.so.0(+0x1728d) [0x7f41f08aa28d] +/lib64/libggml-vulkan.so.0(+0x10a410) [0x7f41f099d410] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f41f07f1192] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f41f445fc70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f41f4462255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f41f446898f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f41f446a32e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x4038b9] +/lib64/libc.so.6(+0x35b5) [0x7f41f02495b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f41f0249668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] Ministral-3-14B-Instruct-2512-BF16__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..8439df5 --- /dev/null +++ b/benchmark/results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 42.48 ± 0.11 | +| mistral3 14B BF16 | 25.16 GiB | 13.51 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.81 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..fa48132 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 250.79 ± 0.32 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.03 ± 0.04 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..b578d21 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 249.66 ± 0.90 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.42 ± 0.07 | + +build: ab6120cde (8997) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..9198c57 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 290.67 ± 0.26 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.11 ± 0.05 | + +build: ab6120cde (8997) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..97ba25a --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 242.09 ± 0.33 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 15.47 ± 0.10 | + +build: ab6120cde (8997) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..42c0726 --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 85.09 ± 0.16 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.21 ± 0.04 | + +build: ab6120cde (8997) diff --git a/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..f07472e --- /dev/null +++ b/benchmark/results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 174.22 ± 0.19 | +| nemotron_h_moe 120B.A12B Q4_K - Medium | 78.02 GiB | 120.67 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.55 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..51d8bed --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 110.56 ± 0.10 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.59 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..8280acb --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 102.26 ± 0.13 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.53 ± 0.07 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..fa76777 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 159.40 ± 0.10 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 22.26 ± 0.17 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..ec90521 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 102.96 ± 0.05 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 23.26 ± 0.12 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..7744f9d --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 60.42 ± 0.04 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 17.44 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..210de70 --- /dev/null +++ b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 63.65 ± 0.38 | +| qwen3moe 30B.A3B Q4_K - Medium | 17.35 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 24.54 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..db2483b --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 170.79 ± 0.22 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.19 ± 0.06 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..4b7d92a --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 152.12 ± 0.13 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 14.21 ± 0.22 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..81ec5de --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 187.01 ± 3.13 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 16.91 ± 0.09 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..8c2d444 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 138.80 ± 0.17 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 17.26 ± 0.15 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..2e87f68 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 70.58 ± 0.04 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 16.94 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..51a6b87 --- /dev/null +++ b/benchmark/results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 156.41 ± 0.22 | +| qwen35moe 122B.A10B Q5_K - Medium | 85.60 GiB | 122.11 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 18.78 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..cea0bad --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 296.43 ± 1.09 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.60 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..fe7a028 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 264.06 ± 0.65 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 20.14 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..de6b985 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 326.10 ± 1.55 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.05 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..a164cdd --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 256.49 ± 1.26 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 20.93 ± 0.06 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..dd4c0ad --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 82.44 ± 0.12 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 10.40 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..4f99e4f --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 226.78 ± 1.49 | +| qwen35moe 35B.A3B BF16 | 64.60 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 10.05 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..983e08c --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 412.60 ± 0.33 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.90 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..3df355f --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 460.71 ± 1.18 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.40 ± 0.24 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..676bb76 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 473.84 ± 2.27 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 36.70 ± 0.97 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..222c0d8 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 432.39 ± 0.18 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 38.08 ± 0.59 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..122b511 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 183.42 ± 0.98 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 36.62 ± 0.05 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..44d692e --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 507.32 ± 1.33 | +| qwen35moe 35B.A3B Q4_K - Medium | 20.81 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 42.75 ± 0.05 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..6b183a9 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 414.31 ± 0.61 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.30 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..8f2da45 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 414.74 ± 1.74 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.74 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..39a638f --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 515.64 ± 0.13 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.53 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..464aae3 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 375.11 ± 2.14 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.02 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..9224ec2 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 175.02 ± 1.28 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 31.29 ± 0.05 | + +build: ab6120cde (8997) diff --git a/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..cc7bef5 --- /dev/null +++ b/benchmark/results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 468.83 ± 1.17 | +| qwen35moe 35B.A3B Q8_0 | 35.80 GiB | 34.66 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 35.46 ± 0.04 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..c5eeaa8 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 408.04 ± 3.58 | +| gemma4 ?B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.52 ± 0.01 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..5f077bd --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 403.29 ± 1.83 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.35 ± 0.19 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..898ec96 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 441.14 ± 2.12 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 17.76 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..95707ae --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 345.70 ± 0.13 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 19.16 ± 0.47 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..383b4a9 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 39.57 ± 0.04 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 14.10 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..034abc4 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 281.32 ± 0.70 | +| gemma4 26B.A4B BF16 | 47.02 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 13.15 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..2ce4c72 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 446.29 ± 0.87 | +| gemma4 ?B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 35.16 ± 0.03 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..1bb9c55 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 453.92 ± 5.18 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.85 ± 0.16 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..ccd7682 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 469.64 ± 3.17 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.31 ± 0.25 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..7a8df40 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 374.22 ± 0.13 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 34.77 ± 0.32 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..df37dff --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 54.90 ± 0.12 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 29.52 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..69589cc --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 445.00 ± 0.08 | +| gemma4 26B.A4B Q4_K - Medium | 15.90 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 36.64 ± 0.06 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..c26efe2 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 436.86 ± 3.98 | +| gemma4 ?B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 31.55 ± 0.72 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..dc7f2ce --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 452.10 ± 0.82 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 29.87 ± 3.28 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..c1f0b33 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 451.48 ± 1.29 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 30.76 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..9ee129b --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 363.83 ± 2.85 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 31.68 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..7fccbf2 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 54.04 ± 0.08 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 26.65 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..7f18752 --- /dev/null +++ b/benchmark/results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 415.94 ± 0.55 | +| gemma4 26B.A4B Q8_0 | 25.94 GiB | 25.23 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 31.99 ± 0.06 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..1ab9d0f --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 96.84 ± 0.57 | +| gemma4 ?B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.07 ± 0.02 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..61898ee --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 98.23 ± 0.26 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.08 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..3fdc9fe --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 101.08 ± 0.13 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 2.94 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..125b805 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 83.27 ± 0.13 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.04 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..6a066ed --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fa1d7fcc465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fa1d7fcc83b] +/lib64/libggml-base.so.0(+0x16f19) [0x7fa1d7fdef19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fa1d7d6fbfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fa1d7d59d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fa1d7d6fea8] +/lib64/libggml-vulkan.so.0(+0x1728d) [0x7fa1d80a128d] +/lib64/libggml-vulkan.so.0(+0x10a410) [0x7fa1d8194410] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fa1d7fe8192] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fa1dbc56c70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fa1dbc59255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fa1dbc5f98f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fa1dbc6132e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x4038b9] +/lib64/libc.so.6(+0x35b5) [0x7fa1d7a405b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fa1d7a40668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-4-31B-it-BF16-00001-of-00002__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..8f3488f --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 45.96 ± 1.09 | +| gemma4 31B BF16 | 57.18 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 3.06 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..86912e3 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 91.86 ± 0.33 | +| gemma4 ?B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.51 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..94ec84e --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.57 ± 0.20 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.52 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..b9aae5c --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 96.98 ± 0.37 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.26 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..43b05b9 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 84.96 ± 0.07 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 7.56 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..7913e47 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7fbb0ecdb465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7fbb0ecdb83b] +/lib64/libggml-base.so.0(+0x16f19) [0x7fbb0ecedf19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7fbb0ea7ebfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7fbb0ea68d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7fbb0ea7eea8] +/lib64/libggml-vulkan.so.0(+0x1728d) [0x7fbb0edb028d] +/lib64/libggml-vulkan.so.0(+0x10a410) [0x7fbb0eea3410] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7fbb0ecf7192] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7fbb12965c70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7fbb12968255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7fbb1296e98f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7fbb1297032e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x403a5b] +/lib64/libc.so.6(+0x35b5) [0x7fbb0e74f5b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7fbb0e74f668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q4_K_XL__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..f764fbf --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 76.03 ± 1.52 | +| gemma4 31B Q4_K - Medium | 17.46 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 7.78 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..d77f293 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 90.15 ± 0.22 | +| gemma4 ?B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.00 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..7d96053 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.92 ± 0.29 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 5.00 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..5ae9fb9 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 93.78 ± 0.36 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 4.95 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..f00e31f --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 81.58 ± 0.14 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 4.99 ± 0.05 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..f4d2122 --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,23 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +/lib64/libggml-base.so.0(+0x4465) [0x7f2f37a93465] +/lib64/libggml-base.so.0(ggml_print_backtrace+0x1eb) [0x7f2f37a9383b] +/lib64/libggml-base.so.0(+0x16f19) [0x7f2f37aa5f19] +/lib64/libstdc++.so.6(+0x1ebfc) [0x7f2f37836bfc] +/lib64/libstdc++.so.6(_ZSt10unexpectedv+0x0) [0x7f2f37820d3a] +/lib64/libstdc++.so.6(+0x1eea8) [0x7f2f37836ea8] +/lib64/libggml-vulkan.so.0(+0x1728d) [0x7f2f37b6828d] +/lib64/libggml-vulkan.so.0(+0x10a410) [0x7f2f37c5b410] +/lib64/libggml-base.so.0(ggml_backend_sched_graph_compute_async+0x3b2) [0x7f2f37aaf192] +/lib64/libllama.so.0(_ZN13llama_context13graph_computeEP11ggml_cgraphb+0xa0) [0x7f2f3b71dc70] +/lib64/libllama.so.0(_ZN13llama_context14process_ubatchERK12llama_ubatch14llm_graph_typeP22llama_memory_context_iR11ggml_status+0xe5) [0x7f2f3b720255] +/lib64/libllama.so.0(_ZN13llama_context6decodeERK11llama_batch+0x35f) [0x7f2f3b72698f] +/lib64/libllama.so.0(llama_decode+0xe) [0x7f2f3b72832e] +/usr/sbin/llama-bench() [0x40663b] +/usr/sbin/llama-bench() [0x403a5b] +/lib64/libc.so.6(+0x35b5) [0x7f2f375075b5] +/lib64/libc.so.6(__libc_start_main+0x88) [0x7f2f37507668] +/usr/sbin/llama-bench() [0x404e65] +terminate called after throwing an instance of 'vk::DeviceLostError' + what(): vk::Queue::submit: ErrorDeviceLost +✖ ! [vulkan_amdvlk] gemma-4-31B-it-UD-Q8_K_XL__fa1 __longctx65536 failed (exit 0) diff --git a/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..7ae88af --- /dev/null +++ b/benchmark/results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 71.68 ± 0.77 | +| gemma4 31B Q8_0 | 32.60 GiB | 30.70 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 5.02 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..76a7342 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 175.99 ± 0.26 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 27.44 ± 0.31 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..97ab7bd --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 175.32 ± 0.50 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 24.52 ± 5.66 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..f636f42 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 273.87 ± 0.30 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 32.17 ± 0.38 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..dba40ef --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 177.27 ± 0.45 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 27.69 ± 0.13 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..1aedfef --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 129.83 ± 0.04 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 26.17 ± 0.07 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..3dd3243 --- /dev/null +++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 139.61 ± 0.56 | +| gpt-oss 120B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 35.00 ± 0.05 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..07c8d4a --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 277.16 ± 1.25 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.39 ± 0.04 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..b3b5c05 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 275.76 ± 1.52 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.22 ± 0.34 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..879519f --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 440.23 ± 0.27 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 47.51 ± 0.29 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..af03959 --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 277.70 ± 0.33 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 40.28 ± 0.41 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..3c9107e --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 210.72 ± 0.07 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 38.53 ± 0.01 | + +build: ab6120cde (8997) diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..ee2ba5a --- /dev/null +++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 229.28 ± 0.91 | +| gpt-oss 20B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 50.14 ± 0.11 | + +build: ab6120cde (8997) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log new file mode 100644 index 0000000..a44a4d5 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 72.65 ± 0.35 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.00 ± 0.00 | + +build: 7957de9dc (8645) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log new file mode 100644 index 0000000..859f3de --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 72.92 ± 0.24 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.00 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log new file mode 100644 index 0000000..3d8bc49 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 83.57 ± 0.98 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 3.74 ± 0.02 | + +build: ab6120cde (8997) diff --git a/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log new file mode 100644 index 0000000..c32cd3d --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_cuda_init: found 1 ROCm devices (Total VRAM: 126976 MiB): + Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32, VRAM: 126976 MiB +| model | size | params | backend | ngl | n_ubatch | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -------: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | pp2048 @ d65536 | 85.33 ± 0.30 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | ROCm | 99 | 2048 | 1 | 0 | tg32 @ d65536 | 2.90 ± 0.03 | + +build: ab6120cde (8997) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log new file mode 100644 index 0000000..9685755 --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 89.90 ± 0.68 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 4.61 ± 0.00 | + +build: ab6120cde (8997) diff --git a/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log new file mode 100644 index 0000000..6e478db --- /dev/null +++ b/benchmark/results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log @@ -0,0 +1,8 @@ +ggml_vulkan: Found 1 Vulkan devices: +ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat +| model | size | params | backend | ngl | fa | mmap | test | t/s | +| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | pp2048 @ d65536 | 104.40 ± 1.78 | +| llama 7B Q4_0 | 3.56 GiB | 6.74 B | Vulkan | 99 | 1 | 0 | tg32 @ d65536 | 4.60 ± 0.00 | + +build: ab6120cde (8997) diff --git a/docs/results.json b/docs/results.json index 214ddb4..32da874 100644 --- a/docs/results.json +++ b/docs/results.json @@ -1,6 +1,6 @@ { "meta": { - "generated_at": "2026-04-29T05:45:18Z", + "generated_at": "2026-05-03T15:19:28Z", "system_info": { "distro": "Fedora Linux 43 (Workstation Edition)", "kernel": "6.19.9-200.fc43.x86_64", @@ -16,6 +16,10 @@ "hash": "7957de9dc", "number": "8645" }, + { + "hash": "ab6120cde", + "number": "8997" + }, { "hash": "f53577432", "number": "8942" @@ -145,6 +149,62 @@ "number": "8645" } }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 7.94, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 1.49, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", @@ -257,6 +317,62 @@ "number": "8942" } }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 8.12, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 1.48, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", @@ -369,6 +485,62 @@ "number": "8743" } }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 8.16, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 1.58, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", @@ -481,6 +653,62 @@ "number": "8743" } }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 8.45, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 1.43, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 125.03, + "file_size_gib": 70.31, + "name_params_b": 125.03, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", @@ -531,6 +759,31 @@ "rpc": false, "build": null }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 123.0, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", @@ -643,6 +896,31 @@ "number": "8743" } }, + { + "model": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002", + "model_clean": "Devstral-2-123B-Instruct-2512-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 123.0, + "quant": "Q4_K_XL", + "log": "results/Devstral-2-123B-Instruct-2512-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -755,6 +1033,62 @@ "number": "8645" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 46.54, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 11.91, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -836,6 +1170,62 @@ "number": "8942" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 46.47, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 11.63, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -973,6 +1363,62 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 50.95, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 11.15, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -1085,6 +1531,62 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 45.39, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 11.22, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -1197,6 +1699,31 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "GLM-4.7-Flash-BF16-00001-of-00002", "model_clean": "GLM-4.7-Flash-BF16", @@ -1309,6 +1836,62 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 61.52, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-BF16-00001-of-00002", + "model_clean": "GLM-4.7-Flash-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 6.57, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 55.79, + "name_params_b": 29.94, + "quant": "BF16", + "log": "results/GLM-4.7-Flash-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", @@ -1421,6 +2004,62 @@ "number": "8645" } }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 47.63, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.98, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", @@ -1533,6 +2172,62 @@ "number": "8942" } }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 47.62, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.87, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", @@ -1645,6 +2340,62 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 51.79, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.36, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", @@ -1757,6 +2508,62 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 46.43, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.43, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", @@ -1838,6 +2645,31 @@ "rpc": false, "build": null }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "GLM-4.7-Flash-UD-Q8_K_XL", "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", @@ -1950,6 +2782,62 @@ "number": "8743" } }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 72.8, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "GLM-4.7-Flash-UD-Q8_K_XL", + "model_clean": "GLM-4.7-Flash-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.39, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 29.94, + "file_size_gib": 32.7, + "name_params_b": 29.94, + "quant": "Q8_K_XL", + "log": "results/GLM-4.7-Flash-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", @@ -2062,6 +2950,31 @@ "number": "8645" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "load", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", @@ -2174,6 +3087,31 @@ "number": "8942" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "load", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", @@ -2286,6 +3224,31 @@ "number": "8743" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", @@ -2398,6 +3361,31 @@ "number": "8743" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", @@ -2510,6 +3498,31 @@ "number": "8743" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", "model_clean": "MiniMax-M2.7-UD-Q3_K_S", @@ -2622,6 +3635,31 @@ "number": "8743" } }, + { + "model": "MiniMax-M2.7-UD-Q3_K_S-00001-of-00003", + "model_clean": "MiniMax-M2.7-UD-Q3_K_S", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": null, + "quant": "Q3_K_S", + "log": "results/MiniMax-M2.7-UD-Q3_K_S-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "MiniMax-M2.7-UD-Q3_K_XL-00001-of-00004", "model_clean": "MiniMax-M2.7-UD-Q3_K_XL", @@ -3182,6 +4220,62 @@ "number": "8645" } }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 75.02, + "tps_std": 1.56, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 6.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", @@ -3294,6 +4388,62 @@ "number": "8942" } }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 80.08, + "tps_std": 2.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 6.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", @@ -3406,6 +4556,62 @@ "number": "8743" } }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 71.24, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 6.09, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", @@ -3518,6 +4724,62 @@ "number": "8743" } }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 73.23, + "tps_std": 0.94, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 6.11, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", @@ -3568,6 +4830,31 @@ "rpc": false, "build": null }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 14.0, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "Ministral-3-14B-Instruct-2512-BF16", "model_clean": "Ministral-3-14B-Instruct-2512-BF16", @@ -3680,6 +4967,62 @@ "number": "8743" } }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 42.48, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Ministral-3-14B-Instruct-2512-BF16", + "model_clean": "Ministral-3-14B-Instruct-2512-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.81, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 13.51, + "file_size_gib": 25.16, + "name_params_b": 13.51, + "quant": "BF16", + "log": "results/Ministral-3-14B-Instruct-2512-BF16__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", @@ -3792,6 +5135,62 @@ "number": "8645" } }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 250.79, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 15.03, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", @@ -3904,6 +5303,62 @@ "number": "8942" } }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 249.66, + "tps_std": 0.9, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 15.42, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", @@ -4016,6 +5471,62 @@ "number": "8743" } }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 290.67, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 15.11, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", @@ -4128,6 +5639,62 @@ "number": "8743" } }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 242.09, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 15.47, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", @@ -4240,6 +5807,62 @@ "number": "8743" } }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 85.09, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 13.21, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", @@ -4352,6 +5975,62 @@ "number": "8743" } }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 174.22, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003", + "model_clean": "NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 13.55, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 120.67, + "file_size_gib": 78.02, + "name_params_b": 120.67, + "quant": "Q4_K_XL", + "log": "results/NVIDIA-Nemotron-3-Super-120B-A12B-UD-Q4_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -4464,6 +6143,62 @@ "number": "8645" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 110.56, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 22.59, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -4576,6 +6311,62 @@ "number": "8942" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 102.26, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 22.53, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -4688,6 +6479,62 @@ "number": "8743" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 159.4, + "tps_std": 0.1, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 22.26, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -4800,6 +6647,62 @@ "number": "8743" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 102.96, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 23.26, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -4912,6 +6815,62 @@ "number": "8743" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 60.42, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 17.44, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", @@ -5024,6 +6983,62 @@ "number": "8743" } }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 63.65, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "model_clean": "Qwen3-Coder-30B-A3B-Instruct-Q4_K_M", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 24.54, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.53, + "file_size_gib": 17.35, + "name_params_b": 30.53, + "quant": "Q4_K_M", + "log": "results/Qwen3-Coder-30B-A3B-Instruct-Q4_K_M__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", @@ -5136,6 +7151,62 @@ "number": "8645" } }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 170.79, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.19, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", @@ -5248,6 +7319,62 @@ "number": "8942" } }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 152.12, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.21, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", @@ -5360,6 +7487,62 @@ "number": "8743" } }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 187.01, + "tps_std": 3.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 16.91, + "tps_std": 0.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", @@ -5472,6 +7655,62 @@ "number": "8743" } }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 138.8, + "tps_std": 0.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 17.26, + "tps_std": 0.15, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", @@ -5584,6 +7823,62 @@ "number": "8743" } }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 70.58, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 16.94, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", @@ -5696,6 +7991,62 @@ "number": "8743" } }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 156.41, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003", + "model_clean": "Qwen3.5-122B-A10B-UD-Q5_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 18.78, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 122.11, + "file_size_gib": 85.6, + "name_params_b": 122.11, + "quant": "Q5_K_XL", + "log": "results/Qwen3.5-122B-A10B-UD-Q5_K_XL-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.6-35B-A3B-BF16", @@ -5808,6 +8159,62 @@ "number": "8645" } }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 296.43, + "tps_std": 1.09, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 19.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.6-35B-A3B-BF16", @@ -5920,6 +8327,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 264.06, + "tps_std": 0.65, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 20.14, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.6-35B-A3B-BF16", @@ -6032,6 +8495,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 326.1, + "tps_std": 1.55, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 19.05, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.6-35B-A3B-BF16", @@ -6082,6 +8601,62 @@ "rpc": false, "build": null }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 256.49, + "tps_std": 1.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 20.93, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.6-35B-A3B-BF16", @@ -6194,6 +8769,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 82.44, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 10.4, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", "model_clean": "Qwen3.6-35B-A3B-BF16", @@ -6306,6 +8937,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 226.78, + "tps_std": 1.49, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-BF16-00001-of-00002", + "model_clean": "Qwen3.6-35B-A3B-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 10.05, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 64.6, + "name_params_b": 34.66, + "quant": "BF16", + "log": "results/Qwen3.6-35B-A3B-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", @@ -6418,6 +9105,62 @@ "number": "8645" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 412.6, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 34.9, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", @@ -6530,6 +9273,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 460.71, + "tps_std": 1.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.4, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", @@ -6642,6 +9441,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 473.84, + "tps_std": 2.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 36.7, + "tps_std": 0.97, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", @@ -6754,6 +9609,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 432.39, + "tps_std": 0.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 38.08, + "tps_std": 0.59, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", @@ -6866,6 +9777,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 183.42, + "tps_std": 0.98, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 36.62, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", @@ -6978,6 +9945,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 507.32, + "tps_std": 1.33, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 42.75, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 20.81, + "name_params_b": 34.66, + "quant": "Q4_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", @@ -7090,6 +10113,62 @@ "number": "8645" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 414.31, + "tps_std": 0.61, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.3, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", @@ -7202,6 +10281,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 414.74, + "tps_std": 1.74, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.74, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", @@ -7314,6 +10449,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 515.64, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 34.53, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", @@ -7426,6 +10617,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 375.11, + "tps_std": 2.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.02, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", @@ -7538,6 +10785,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 175.02, + "tps_std": 1.28, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 31.29, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", @@ -7650,6 +10953,62 @@ "number": "8942" } }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 468.83, + "tps_std": 1.17, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "model_clean": "Qwen3.6-35B-A3B-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.46, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 34.66, + "file_size_gib": 35.8, + "name_params_b": 34.66, + "quant": "Q8_K_XL", + "log": "results/Qwen3.6-35B-A3B-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", @@ -7762,6 +11121,62 @@ "number": "8645" } }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 408.04, + "tps_std": 3.58, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 19.52, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", @@ -7874,6 +11289,62 @@ "number": "8942" } }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 403.29, + "tps_std": 1.83, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 19.35, + "tps_std": 0.19, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", @@ -7986,6 +11457,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 441.14, + "tps_std": 2.12, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 17.76, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", @@ -8098,6 +11625,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 345.7, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 19.16, + "tps_std": 0.47, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", @@ -8210,6 +11793,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 39.57, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 14.1, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", "model_clean": "gemma-4-26B-A4B-it-BF16", @@ -8322,6 +11961,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 281.32, + "tps_std": 0.7, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-26B-A4B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 13.15, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 47.02, + "name_params_b": 25.23, + "quant": "BF16", + "log": "results/gemma-4-26B-A4B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", @@ -8434,6 +12129,62 @@ "number": "8645" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 446.29, + "tps_std": 0.87, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.16, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", @@ -8546,6 +12297,62 @@ "number": "8942" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 453.92, + "tps_std": 5.18, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 34.85, + "tps_std": 0.16, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", @@ -8658,6 +12465,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 469.64, + "tps_std": 3.17, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 34.31, + "tps_std": 0.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", @@ -8770,6 +12633,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 374.22, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 34.77, + "tps_std": 0.32, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", @@ -8882,6 +12801,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 54.9, + "tps_std": 0.12, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 29.52, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", @@ -8994,6 +12969,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 445.0, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 36.64, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 15.9, + "name_params_b": 25.23, + "quant": "Q4_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", @@ -9106,6 +13137,62 @@ "number": "8645" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 436.86, + "tps_std": 3.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 31.55, + "tps_std": 0.72, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", @@ -9218,6 +13305,62 @@ "number": "8942" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 452.1, + "tps_std": 0.82, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 29.87, + "tps_std": 3.28, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", @@ -9330,6 +13473,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 451.48, + "tps_std": 1.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 30.76, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", @@ -9442,6 +13641,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 363.83, + "tps_std": 2.85, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 31.68, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", @@ -9554,6 +13809,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 54.04, + "tps_std": 0.08, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 26.65, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", @@ -9666,6 +13977,62 @@ "number": "8743" } }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 415.94, + "tps_std": 0.55, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-26B-A4B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 31.99, + "tps_std": 0.06, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 25.23, + "file_size_gib": 25.94, + "name_params_b": 25.23, + "quant": "Q8_K_XL", + "log": "results/gemma-4-26B-A4B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", @@ -9778,6 +14145,62 @@ "number": "8645" } }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 96.84, + "tps_std": 0.57, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.07, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", @@ -9890,6 +14313,62 @@ "number": "8942" } }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 98.23, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.08, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", @@ -10002,6 +14481,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 101.08, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 2.94, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", @@ -10114,6 +14649,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 83.27, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.04, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", @@ -10164,6 +14755,31 @@ "rpc": false, "build": null }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 31.0, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "gemma-4-31B-it-BF16-00001-of-00002", "model_clean": "gemma-4-31B-it-BF16", @@ -10276,6 +14892,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 45.96, + "tps_std": 1.09, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-BF16-00001-of-00002", + "model_clean": "gemma-4-31B-it-BF16", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.06, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 57.18, + "name_params_b": 30.7, + "quant": "BF16", + "log": "results/gemma-4-31B-it-BF16-00001-of-00002__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", @@ -10388,6 +15060,62 @@ "number": "8645" } }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 91.86, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 7.51, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", @@ -10500,6 +15228,62 @@ "number": "8942" } }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 93.57, + "tps_std": 0.2, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 7.52, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", @@ -10612,6 +15396,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 96.98, + "tps_std": 0.37, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 7.26, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", @@ -10724,6 +15564,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 84.96, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 7.56, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", @@ -10805,6 +15701,31 @@ "rpc": false, "build": null }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 31.0, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "gemma-4-31B-it-UD-Q4_K_XL", "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", @@ -10917,6 +15838,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 76.03, + "tps_std": 1.52, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q4_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q4_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 7.78, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 17.46, + "name_params_b": 30.7, + "quant": "Q4_K_XL", + "log": "results/gemma-4-31B-it-UD-Q4_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", @@ -11029,6 +16006,62 @@ "number": "8645" } }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 90.15, + "tps_std": 0.22, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", @@ -11141,6 +16174,62 @@ "number": "8942" } }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 93.92, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", @@ -11253,6 +16342,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 93.78, + "tps_std": 0.36, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 4.95, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", @@ -11365,6 +16510,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 81.58, + "tps_std": 0.14, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 4.99, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", @@ -11446,6 +16647,31 @@ "rpc": false, "build": null }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": null, + "tps_mean": null, + "tps_std": null, + "error": true, + "error_type": "runtime", + "backend": null, + "ngl": null, + "mmap": null, + "params_b": null, + "file_size_gib": null, + "name_params_b": 31.0, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": null + }, { "model": "gemma-4-31B-it-UD-Q8_K_XL", "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", @@ -11558,6 +16784,62 @@ "number": "8743" } }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 71.68, + "tps_std": 0.77, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gemma-4-31B-it-UD-Q8_K_XL", + "model_clean": "gemma-4-31B-it-UD-Q8_K_XL", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 5.02, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 30.7, + "file_size_gib": 32.6, + "name_params_b": 30.7, + "quant": "Q8_K_XL", + "log": "results/gemma-4-31B-it-UD-Q8_K_XL__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -11670,6 +16952,62 @@ "number": "8645" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 175.99, + "tps_std": 0.26, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 27.44, + "tps_std": 0.31, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -11782,6 +17120,62 @@ "number": "8942" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 175.32, + "tps_std": 0.5, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 24.52, + "tps_std": 5.66, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -11894,6 +17288,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 273.87, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 32.17, + "tps_std": 0.38, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -12006,6 +17456,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 177.27, + "tps_std": 0.45, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 27.69, + "tps_std": 0.13, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -12118,6 +17624,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 129.83, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 26.17, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-120b-mxfp4-00001-of-00003", "model_clean": "gpt-oss-120b-mxfp4", @@ -12230,6 +17792,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 139.61, + "tps_std": 0.56, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-120b-mxfp4-00001-of-00003", + "model_clean": "gpt-oss-120b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 35.0, + "tps_std": 0.05, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 116.83, + "file_size_gib": 59.02, + "name_params_b": 116.83, + "quant": "MXFP4", + "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -12342,6 +17960,62 @@ "number": "8645" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 277.16, + "tps_std": 1.25, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 40.39, + "tps_std": 0.04, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -12454,6 +18128,62 @@ "number": "8942" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 275.76, + "tps_std": 1.52, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 40.22, + "tps_std": 0.34, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -12566,6 +18296,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 440.23, + "tps_std": 0.27, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 47.51, + "tps_std": 0.29, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -12678,6 +18464,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 277.7, + "tps_std": 0.33, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 40.28, + "tps_std": 0.41, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -12790,6 +18632,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 210.72, + "tps_std": 0.07, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 38.53, + "tps_std": 0.01, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "gpt-oss-20b-mxfp4", "model_clean": "gpt-oss-20b-mxfp4", @@ -12902,6 +18800,62 @@ "number": "8743" } }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 229.28, + "tps_std": 0.91, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "gpt-oss-20b-mxfp4", + "model_clean": "gpt-oss-20b-mxfp4", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 50.14, + "tps_std": 0.11, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 20.91, + "file_size_gib": 11.27, + "name_params_b": 20.91, + "quant": "MXFP4", + "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -13014,6 +18968,62 @@ "number": "8645" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 72.65, + "tps_std": 0.35, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2-pr21344", + "env_base": "rocm", + "env_variant": "7_2_2-pr21344", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2-pr21344__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "7957de9dc", + "number": "8645" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -13126,6 +19136,62 @@ "number": "8942" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 72.92, + "tps_std": 0.24, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm-7_2_2", + "env_base": "rocm", + "env_variant": "7_2_2", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.0, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm-7_2_2__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -13238,6 +19304,62 @@ "number": "8743" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 83.57, + "tps_std": 0.98, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm6_4_4", + "env_base": "rocm6_4_4", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 3.74, + "tps_std": 0.02, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm6_4_4__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -13350,6 +19472,62 @@ "number": "8743" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 85.33, + "tps_std": 0.3, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "rocm7-nightlies", + "env_base": "rocm7", + "env_variant": "nightlies", + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 2.9, + "tps_std": 0.03, + "error": false, + "error_type": null, + "backend": "ROCm", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__rocm7-nightlies__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -13462,6 +19640,62 @@ "number": "8743" } }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 89.9, + "tps_std": 0.68, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_amdvlk", + "env_base": "vulkan_amdvlk", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 4.61, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_amdvlk__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, { "model": "llama-2-7b.Q4_0", "model_clean": "llama-2-7b.Q4_0", @@ -13573,6 +19807,62 @@ "hash": "3f8752b55", "number": "8743" } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "pp2048 @ d65536", + "tps_mean": 104.4, + "tps_std": 1.78, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } + }, + { + "model": "llama-2-7b.Q4_0", + "model_clean": "llama-2-7b.Q4_0", + "env": "vulkan_radv", + "env_base": "vulkan_radv", + "env_variant": null, + "fa": true, + "context": "longctx65536", + "context_tokens": 65536, + "test": "tg32 @ d65536", + "tps_mean": 4.6, + "tps_std": 0.0, + "error": false, + "error_type": null, + "backend": "Vulkan", + "ngl": 99, + "mmap": 0, + "params_b": 6.74, + "file_size_gib": 3.56, + "name_params_b": 6.74, + "quant": "Q4_0", + "log": "results/llama-2-7b.Q4_0__vulkan_radv__fa1__longctx65536.log", + "rpc": false, + "build": { + "hash": "ab6120cde", + "number": "8997" + } } ] } \ No newline at end of file