From 62e5080102f65faef53d9875103cbbc017a3e16e Mon Sep 17 00:00:00 2001
From: Donato Capitella
Date: Sun, 17 Aug 2025 08:53:16 +0100
Subject: [PATCH] Updated benchmarks
---
README.md | 1 -
benchmark/generate_results.json.py | 2 +-
...K_XL-00001-of-00002__rocm6_4_2-rocwmma.log | 2 +-
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 8 +-
...r-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log | 6 +-
...Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log | 8 +-
...K_XL-00001-of-00002__rocm6_4_3-rocwmma.log | 15 +
...0001-of-00002__rocm6_4_3-rocwmma__fa1.log} | 8 +-
...01-of-00002__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} | 8 +-
...r-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log | 15 +
...Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log | 10 +
...K_XL-00001-of-00002__rocm6_4_3__hblt0.log} | 4 +-
...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 6 +
...-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log | 10 -
...K_XL-00001-of-00002__rocm7_beta__hblt0.log | 10 -
...00001-of-00002__rocm7_beta__hblt0__fa1.log | 6 -
..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 6 +-
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +-
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
...ir-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 6 +-
...-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log | 6 +-
...4_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 8 +-
...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +-
...-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log | 6 +-
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +-
...UD-Q4_K_XL-00001-of-00002__vulkan_radv.log | 6 +-
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +-
...K_XL-00001-of-00003__rocm6_4_2-rocwmma.log | 8 +-
...00001-of-00003__rocm6_4_2-rocwmma__fa1.log | 2 +-
...r-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log | 8 +-
...Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log | 8 +-
..._XL-00001-of-00003__rocm6_4_3-rocwmma.log} | 11 +-
...0001-of-00003__rocm6_4_3-rocwmma__fa1.log} | 8 +-
...001-of-00003__rocm6_4_3-rocwmma__hblt0.log | 6 +
...f-00003__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +
...-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log} | 11 +-
...6_K_XL-00001-of-00003__rocm6_4_3__fa1.log} | 4 +-
...K_XL-00001-of-00003__rocm6_4_3__hblt0.log} | 4 +-
...00001-of-00003__rocm6_4_3__hblt0__fa1.log} | 4 +-
...-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log | 10 -
...K_XL-00001-of-00003__rocm7_beta__hblt0.log | 10 -
...00001-of-00003__rocm7_beta__hblt0__fa1.log | 6 -
..._K_XL-00001-of-00003__rocm7_rc-rocwmma.log | 6 +-
...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 7 +-
...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 7 +-
...ir-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log | 6 +-
...-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log | 6 +-
...6_K_XL-00001-of-00003__rocm7_rc__hblt0.log | 7 +-
...L-00001-of-00003__rocm7_rc__hblt0__fa1.log | 1 +
...-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log | 6 +-
..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +-
...UD-Q6_K_XL-00001-of-00003__vulkan_radv.log | 6 +-
..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +-
...Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log | 6 -
...-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log | 10 -
...8_K_XL-00001-of-00002__rocm7_beta__fa1.log | 6 -
...K_XL-00001-of-00002__rocm7_beta__hblt0.log | 6 -
...00001-of-00002__rocm7_beta__hblt0__fa1.log | 6 -
..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 10 -
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 -
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 5 -
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 5 -
...2B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log | 10 -
...-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log | 5 -
...8_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 5 -
...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 5 -
...-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log | 8 -
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...UD-Q8_K_XL-00001-of-00002__vulkan_radv.log | 8 -
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 -
...K_XL-00001-of-00002__rocm6_4_2-rocwmma.log | 2 +-
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 2 +-
...t-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log | 8 +-
...Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log | 6 +-
..._XL-00001-of-00002__rocm6_4_3-rocwmma.log} | 11 +-
...0001-of-00002__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...001-of-00002__rocm6_4_3-rocwmma__hblt0.log | 6 +
...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +
...t-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log | 15 +
...8_K_XL-00001-of-00002__rocm6_4_3__fa1.log} | 6 +-
..._K_XL-00001-of-00002__rocm6_4_3__hblt0.log | 6 +
...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 6 +
...-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log | 10 -
...8_K_XL-00001-of-00002__rocm7_beta__fa1.log | 6 -
...K_XL-00001-of-00002__rocm7_beta__hblt0.log | 6 -
...00001-of-00002__rocm7_beta__hblt0__fa1.log | 6 -
..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 6 +-
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 7 +-
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 7 +-
...ct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log | 6 +-
...-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log | 2 +-
...8_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 7 +-
...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +-
...-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log | 6 +-
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +-
...UD-Q8_K_XL-00001-of-00002__vulkan_radv.log | 6 +-
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +-
...Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log | 8 +-
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 2 +-
...nstruct-Q6_K-00001-of-00002__rocm6_4_2.log | 8 +-
...ct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log | 2 +-
...Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log | 11 +
...0001-of-00002__rocm6_4_3-rocwmma__fa1.log} | 8 +-
...01-of-00002__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...f-00002__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +
...nstruct-Q6_K-00001-of-00002__rocm6_4_3.log | 15 +
...t-Q6_K-00001-of-00002__rocm6_4_3__fa1.log} | 4 +-
...Q6_K-00001-of-00002__rocm6_4_3__hblt0.log} | 4 +-
...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 6 +
...struct-Q6_K-00001-of-00002__rocm7_beta.log | 6 -
...t-Q6_K-00001-of-00002__rocm7_beta__fa1.log | 6 -
...Q6_K-00001-of-00002__rocm7_beta__hblt0.log | 6 -
...-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log | 8 +-
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 7 +-
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 7 +-
...Instruct-Q6_K-00001-of-00002__rocm7_rc.log | 6 +-
...uct-Q6_K-00001-of-00002__rocm7_rc__fa1.log | 8 +-
...t-Q6_K-00001-of-00002__rocm7_rc__hblt0.log | 6 +-
...K-00001-of-00002__rocm7_rc__hblt0__fa1.log | 1 +
...uct-Q6_K-00001-of-00002__vulkan_amdvlk.log | 6 +-
...6_K-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +-
...truct-Q6_K-00001-of-00002__vulkan_radv.log | 6 +-
...-Q6_K-00001-of-00002__vulkan_radv__fa1.log | 6 +-
...Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log | 8 +-
...00001-of-00003__rocm6_4_2-rocwmma__fa1.log | 2 +-
...nstruct-Q8_0-00001-of-00003__rocm6_4_2.log | 8 +-
...ct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log | 2 +-
...8_0-00001-of-00003__rocm6_4_3-rocwmma.log} | 11 +-
...0001-of-00003__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...01-of-00003__rocm6_4_3-rocwmma__hblt0.log} | 4 +-
...-00003__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
...nstruct-Q8_0-00001-of-00003__rocm6_4_3.log | 11 +
...t-Q8_0-00001-of-00003__rocm6_4_3__fa1.log} | 4 +-
...-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log | 6 +
...-00001-of-00003__rocm6_4_3__hblt0__fa1.log | 6 +
...struct-Q8_0-00001-of-00003__rocm7_beta.log | 10 -
...t-Q8_0-00001-of-00003__rocm7_beta__fa1.log | 6 -
...Q8_0-00001-of-00003__rocm7_beta__hblt0.log | 6 -
...00001-of-00003__rocm7_beta__hblt0__fa1.log | 6 -
...-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log | 6 +-
...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 7 +-
...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 7 +-
...Instruct-Q8_0-00001-of-00003__rocm7_rc.log | 6 +-
...uct-Q8_0-00001-of-00003__rocm7_rc__fa1.log | 1 +
...t-Q8_0-00001-of-00003__rocm7_rc__hblt0.log | 7 +-
...0-00001-of-00003__rocm7_rc__hblt0__fa1.log | 1 +
...uct-Q8_0-00001-of-00003__vulkan_amdvlk.log | 6 +-
...8_0-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +-
...truct-Q8_0-00001-of-00003__vulkan_radv.log | 6 +-
...-Q8_0-00001-of-00003__vulkan_radv__fa1.log | 6 +-
...K_XL-00001-of-00002__rocm6_4_2-rocwmma.log | 8 +-
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 2 +-
...t-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log | 8 +-
...Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log | 8 +-
...K_XL-00001-of-00002__rocm6_4_3-rocwmma.log | 15 +
...0001-of-00002__rocm6_4_3-rocwmma__fa1.log} | 8 +-
...01-of-00002__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} | 8 +-
...t-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log | 15 +
...4_K_XL-00001-of-00002__rocm6_4_3__fa1.log} | 8 +-
...K_XL-00001-of-00002__rocm6_4_3__hblt0.log} | 4 +-
...-00001-of-00002__rocm6_4_3__hblt0__fa1.log | 6 +
...-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log | 10 -
...K_XL-00001-of-00002__rocm7_beta__hblt0.log | 6 -
...00001-of-00002__rocm7_beta__hblt0__fa1.log | 6 -
..._K_XL-00001-of-00002__rocm7_rc-rocwmma.log | 7 +-
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +-
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 7 +-
...ct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 6 +-
...-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log | 1 +
...4_K_XL-00001-of-00002__rocm7_rc__hblt0.log | 7 +-
...L-00001-of-00002__rocm7_rc__hblt0__fa1.log | 7 +-
...-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log | 6 +-
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +-
...UD-Q4_K_XL-00001-of-00002__vulkan_radv.log | 6 +-
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 6 +-
...K_XL-00001-of-00003__rocm6_4_2-rocwmma.log | 2 +-
...00001-of-00003__rocm6_4_2-rocwmma__fa1.log | 2 +-
...7-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log | 8 +-
...Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log | 8 +-
..._XL-00001-of-00003__rocm6_4_3-rocwmma.log} | 11 +-
...00001-of-00003__rocm6_4_3-rocwmma__fa1.log | 10 +
...01-of-00003__rocm6_4_3-rocwmma__hblt0.log} | 4 +-
...f-00003__rocm6_4_3-rocwmma__hblt0__fa1.log | 6 +
...7-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log | 15 +
...3_K_XL-00001-of-00003__rocm6_4_3__fa1.log} | 4 +-
..._K_XL-00001-of-00003__rocm6_4_3__hblt0.log | 6 +
...-00001-of-00003__rocm6_4_3__hblt0__fa1.log | 6 +
...-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log | 6 -
...3_K_XL-00001-of-00003__rocm7_beta__fa1.log | 7 -
...K_XL-00001-of-00003__rocm7_beta__hblt0.log | 6 -
...00001-of-00003__rocm7_beta__hblt0__fa1.log | 6 -
..._K_XL-00001-of-00003__rocm7_rc-rocwmma.log | 8 +-
...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 7 +-
...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 7 +-
...07-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log | 6 +-
...-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log | 9 +-
...3_K_XL-00001-of-00003__rocm7_rc__hblt0.log | 7 +-
...L-00001-of-00003__rocm7_rc__hblt0__fa1.log | 1 +
...-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log | 6 +-
..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +-
...UD-Q3_K_XL-00001-of-00003__vulkan_radv.log | 6 +-
..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 6 +-
...BF16-00001-of-00002__rocm6_4_2-rocwmma.log | 6 +-
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 6 +-
...30B-A3B-BF16-00001-of-00002__rocm6_4_2.log | 6 +-
...3B-BF16-00001-of-00002__rocm6_4_2__fa1.log | 6 +-
...F16-00001-of-00002__rocm6_4_3-rocwmma.log} | 11 +-
...0001-of-00002__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...01-of-00002__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
...0B-A3B-BF16-00001-of-00002__rocm6_4_3.log} | 11 +-
...B-BF16-00001-of-00002__rocm6_4_3__fa1.log} | 6 +-
...BF16-00001-of-00002__rocm6_4_3__hblt0.log} | 6 +-
...00001-of-00002__rocm6_4_3__hblt0__fa1.log} | 6 +-
...0B-A3B-BF16-00001-of-00002__rocm7_beta.log | 10 -
...B-BF16-00001-of-00002__rocm7_beta__fa1.log | 6 -
...BF16-00001-of-00002__rocm7_beta__hblt0.log | 10 -
...00001-of-00002__rocm7_beta__hblt0__fa1.log | 10 -
...-BF16-00001-of-00002__rocm7_rc-rocwmma.log | 6 +-
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +-
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
...-30B-A3B-BF16-00001-of-00002__rocm7_rc.log | 6 +-
...A3B-BF16-00001-of-00002__rocm7_rc__fa1.log | 1 +
...B-BF16-00001-of-00002__rocm7_rc__hblt0.log | 6 +-
...6-00001-of-00002__rocm7_rc__hblt0__fa1.log | 8 +-
...A3B-BF16-00001-of-00002__vulkan_amdvlk.log | 6 +-
...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 6 +-
...B-A3B-BF16-00001-of-00002__vulkan_radv.log | 6 +-
...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +-
...uct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log | 6 +-
...507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log | 6 +-
...3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log | 6 +-
...struct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log | 6 +-
...uct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log | 15 +
...07-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
...3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log | 15 +
...truct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log} | 8 +-
...uct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log} | 6 +-
...507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log} | 8 +-
...B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log | 10 -
...uct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log | 10 -
...ruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log | 6 +-
...2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log | 6 +-
...07-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log | 6 +-
...-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
...A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log | 6 +-
...nstruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log | 6 +-
...truct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log | 6 +-
...-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log | 6 +-
...nstruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log | 6 +-
...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 6 +-
...-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log | 6 +-
...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 6 +-
...ct-BF16-00001-of-00002__rocm6_4_2__fa1.log | 6 -
...struct-BF16-00001-of-00002__rocm7_beta.log | 10 -
...t-BF16-00001-of-00002__rocm7_beta__fa1.log | 6 -
...BF16-00001-of-00002__rocm7_beta__hblt0.log | 10 -
...00001-of-00002__rocm7_beta__hblt0__fa1.log | 10 -
...-BF16-00001-of-00002__rocm7_rc-rocwmma.log | 10 -
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 10 -
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 10 -
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 10 -
...Instruct-BF16-00001-of-00002__rocm7_rc.log | 10 -
...uct-BF16-00001-of-00002__rocm7_rc__fa1.log | 10 -
...t-BF16-00001-of-00002__rocm7_rc__hblt0.log | 10 -
...6-00001-of-00002__rocm7_rc__hblt0__fa1.log | 10 -
...uct-BF16-00001-of-00002__vulkan_amdvlk.log | 8 -
...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...truct-BF16-00001-of-00002__vulkan_radv.log | 8 -
...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 -
...3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log | 6 +-
...-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log | 6 +-
.../gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log | 6 +-
...ma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log | 6 +-
...3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log | 15 +
...it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
.../gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log | 15 +
...a-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log} | 8 +-
...3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log} | 6 +-
...-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log} | 8 +-
.../gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log | 10 -
...3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log | 10 -
...-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log | 6 +-
...b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log | 6 +-
...it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log | 6 +-
...-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
.../gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log | 6 +-
...mma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log | 6 +-
...a-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log | 6 +-
...2b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log | 6 +-
...mma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log | 6 +-
...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 6 +-
...gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log | 6 +-
...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 6 +-
...BF16-00001-of-00002__rocm6_4_2-rocwmma.log | 6 +-
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 6 +-
...-27b-it-BF16-00001-of-00002__rocm6_4_2.log | 6 +-
...it-BF16-00001-of-00002__rocm6_4_2__fa1.log | 6 +-
...BF16-00001-of-00002__rocm6_4_3-rocwmma.log | 15 +
...0001-of-00002__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...01-of-00002__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
...-27b-it-BF16-00001-of-00002__rocm6_4_3.log | 15 +
...t-BF16-00001-of-00002__rocm6_4_3__fa1.log} | 8 +-
...BF16-00001-of-00002__rocm6_4_3__hblt0.log} | 8 +-
...00001-of-00002__rocm6_4_3__hblt0__fa1.log} | 8 +-
...27b-it-BF16-00001-of-00002__rocm7_beta.log | 10 -
...BF16-00001-of-00002__rocm7_beta__hblt0.log | 10 -
...-BF16-00001-of-00002__rocm7_rc-rocwmma.log | 6 +-
...-00001-of-00002__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00002__rocm7_rc-rocwmma__hblt0.log | 6 +-
...of-00002__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
...3-27b-it-BF16-00001-of-00002__rocm7_rc.log | 6 +-
...-it-BF16-00001-of-00002__rocm7_rc__fa1.log | 6 +-
...t-BF16-00001-of-00002__rocm7_rc__hblt0.log | 6 +-
...6-00001-of-00002__rocm7_rc__hblt0__fa1.log | 7 +-
...-it-BF16-00001-of-00002__vulkan_amdvlk.log | 2 +-
...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 2 +-
...7b-it-BF16-00001-of-00002__vulkan_radv.log | 6 +-
...-BF16-00001-of-00002__vulkan_radv__fa1.log | 6 +-
...emma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log | 6 +-
...3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log | 6 +-
.../gemma-3-4b-it-Q3_K_S__rocm6_4_2.log | 6 +-
.../gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log | 6 +-
...emma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log | 15 +
...-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
.../gemma-3-4b-it-Q3_K_S__rocm6_4_3.log | 15 +
.../gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log} | 8 +-
...emma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log} | 6 +-
...3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log} | 8 +-
.../gemma-3-4b-it-Q3_K_S__rocm7_beta.log | 10 -
...emma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log | 10 -
...3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log | 10 -
...gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log | 6 +-
...-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log | 6 +-
...-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log | 6 +-
...t-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
.../gemma-3-4b-it-Q3_K_S__rocm7_rc.log | 6 +-
.../gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log | 6 +-
.../gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log | 6 +-
...a-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log | 6 +-
.../gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log | 6 +-
...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 6 +-
.../gemma-3-4b-it-Q3_K_S__vulkan_radv.log | 6 +-
...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 6 +-
.../gpt-oss-120b-F16__rocm6_4_2-rocwmma.log | 6 +-
...t-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log | 8 +-
.../results/gpt-oss-120b-F16__rocm6_4_2.log | 6 +-
.../gpt-oss-120b-F16__rocm6_4_2__fa1.log | 6 +-
.../gpt-oss-120b-F16__rocm6_4_3-rocwmma.log | 15 +
...-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...ss-120b-F16__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...0b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log} | 8 +-
.../results/gpt-oss-120b-F16__rocm6_4_3.log | 15 +
.../gpt-oss-120b-F16__rocm6_4_3__fa1.log} | 6 +-
.../gpt-oss-120b-F16__rocm6_4_3__hblt0.log} | 6 +-
...t-oss-120b-F16__rocm6_4_3__hblt0__fa1.log} | 8 +-
.../results/gpt-oss-120b-F16__rocm7_beta.log | 10 -
.../gpt-oss-120b-F16__rocm7_beta__hblt0.log | 10 -
...t-oss-120b-F16__rocm7_beta__hblt0__fa1.log | 10 -
.../gpt-oss-120b-F16__rocm7_rc-rocwmma.log | 6 +-
...pt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log | 6 +-
...-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log | 6 +-
...120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
.../results/gpt-oss-120b-F16__rocm7_rc.log | 6 +-
.../gpt-oss-120b-F16__rocm7_rc__fa1.log | 6 +-
.../gpt-oss-120b-F16__rocm7_rc__hblt0.log | 6 +-
...gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log | 6 +-
.../gpt-oss-120b-F16__vulkan_amdvlk.log | 6 +-
.../gpt-oss-120b-F16__vulkan_amdvlk__fa1.log | 6 +-
.../results/gpt-oss-120b-F16__vulkan_radv.log | 6 +-
.../gpt-oss-120b-F16__vulkan_radv__fa1.log | 6 +-
...xfp4-00001-of-00003__rocm6_4_2-rocwmma.log | 6 +-
...00001-of-00003__rocm6_4_2-rocwmma__fa1.log | 8 +-
...s-120b-mxfp4-00001-of-00003__rocm6_4_2.log | 2 +-
...b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log | 6 +-
...xfp4-00001-of-00003__rocm6_4_3-rocwmma.log | 15 +
...0001-of-00003__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...01-of-00003__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...-00003__rocm6_4_3-rocwmma__hblt0__fa1.log} | 4 +-
...s-120b-mxfp4-00001-of-00003__rocm6_4_3.log | 15 +
...-mxfp4-00001-of-00003__rocm6_4_3__fa1.log} | 6 +-
...xfp4-00001-of-00003__rocm6_4_3__hblt0.log} | 6 +-
...00001-of-00003__rocm6_4_3__hblt0__fa1.log} | 8 +-
...-120b-mxfp4-00001-of-00003__rocm7_beta.log | 10 -
...xfp4-00001-of-00003__rocm7_beta__hblt0.log | 6 -
...00001-of-00003__rocm7_beta__hblt0__fa1.log | 10 -
...mxfp4-00001-of-00003__rocm7_rc-rocwmma.log | 6 +-
...-00001-of-00003__rocm7_rc-rocwmma__fa1.log | 6 +-
...0001-of-00003__rocm7_rc-rocwmma__hblt0.log | 6 +-
...of-00003__rocm7_rc-rocwmma__hblt0__fa1.log | 7 +-
...ss-120b-mxfp4-00001-of-00003__rocm7_rc.log | 6 +-
...0b-mxfp4-00001-of-00003__rocm7_rc__fa1.log | 6 +-
...-mxfp4-00001-of-00003__rocm7_rc__hblt0.log | 6 +-
...4-00001-of-00003__rocm7_rc__hblt0__fa1.log | 6 +-
...0b-mxfp4-00001-of-00003__vulkan_amdvlk.log | 6 +-
...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 6 +-
...120b-mxfp4-00001-of-00003__vulkan_radv.log | 6 +-
...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 6 +-
.../gpt-oss-20b-F32__rocm6_4_2-rocwmma.log | 6 +-
...pt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log | 6 +-
.../results/gpt-oss-20b-F32__rocm6_4_2.log | 6 +-
.../gpt-oss-20b-F32__rocm6_4_2__fa1.log | 6 +-
.../gpt-oss-20b-F32__rocm6_4_3-rocwmma.log | 15 +
...t-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...0b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
.../results/gpt-oss-20b-F32__rocm6_4_3.log | 15 +
...og => gpt-oss-20b-F32__rocm6_4_3__fa1.log} | 8 +-
.../gpt-oss-20b-F32__rocm6_4_3__hblt0.log} | 6 +-
...pt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log} | 8 +-
.../results/gpt-oss-20b-F32__rocm7_beta.log | 10 -
.../gpt-oss-20b-F32__rocm7_beta__hblt0.log | 10 -
.../gpt-oss-20b-F32__rocm7_rc-rocwmma.log | 6 +-
...gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log | 6 +-
...t-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log | 6 +-
...-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
.../results/gpt-oss-20b-F32__rocm7_rc.log | 6 +-
.../gpt-oss-20b-F32__rocm7_rc__fa1.log | 6 +-
.../gpt-oss-20b-F32__rocm7_rc__hblt0.log | 6 +-
.../gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log | 6 +-
.../gpt-oss-20b-F32__vulkan_amdvlk.log | 6 +-
.../gpt-oss-20b-F32__vulkan_amdvlk__fa1.log | 6 +-
.../results/gpt-oss-20b-F32__vulkan_radv.log | 6 +-
.../gpt-oss-20b-F32__vulkan_radv__fa1.log | 6 +-
.../gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log | 6 +-
...-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log | 6 +-
.../results/gpt-oss-20b-mxfp4__rocm6_4_2.log | 6 +-
.../gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log | 6 +-
.../gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log | 15 +
...oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log} | 6 +-
...s-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log} | 6 +-
...-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log} | 6 +-
.../results/gpt-oss-20b-mxfp4__rocm6_4_3.log | 15 +
... => gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log} | 8 +-
.../gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log} | 6 +-
...-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log} | 8 +-
.../results/gpt-oss-20b-mxfp4__rocm7_beta.log | 10 -
.../gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log | 10 -
.../gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log | 6 +-
...t-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log | 6 +-
...oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log | 6 +-
...0b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log | 6 +-
.../results/gpt-oss-20b-mxfp4__rocm7_rc.log | 6 +-
.../gpt-oss-20b-mxfp4__rocm7_rc__fa1.log | 6 +-
.../gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log | 6 +-
...pt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log | 6 +-
.../gpt-oss-20b-mxfp4__vulkan_amdvlk.log | 6 +-
.../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 6 +-
.../gpt-oss-20b-mxfp4__vulkan_radv.log | 6 +-
.../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 6 +-
.../llama3.3-70.6B-Q4_K_M__rocm6_4_2.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm7_beta.log | 10 -
...llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log | 10 -
...ama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log | 6 -
...3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log | 10 -
...lama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log | 10 -
....3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log | 10 -
...-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log | 10 -
...B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm7_rc.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log | 10 -
...llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log | 5 -
...3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log | 5 -
.../llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log | 8 -
...ma3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log | 8 -
.../llama3.3-70.6B-Q4_K_M__vulkan_radv.log | 8 -
...lama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log | 8 -
benchmark/results/run_benchmarks.log | 1392 -
...Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log | 6 -
...-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log | 6 -
...4_K_XL-00001-of-00002__rocm7_beta__fa1.log | 6 -
...ir-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 10 -
...-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log | 8 -
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...UD-Q4_K_XL-00001-of-00002__vulkan_radv.log | 8 -
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 -
...00001-of-00003__rocm6_4_2-rocwmma__fa1.log | 6 -
...-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log | 10 -
...6_K_XL-00001-of-00003__rocm7_beta__fa1.log | 6 -
...ir-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log | 10 -
...-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log | 5 -
...-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log | 8 -
..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 -
...UD-Q6_K_XL-00001-of-00003__vulkan_radv.log | 8 -
..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 -
...K_XL-00001-of-00002__rocm6_4_2-rocwmma.log | 6 -
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 6 -
...B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log | 6 -
...-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log | 6 -
...8_K_XL-00001-of-00002__rocm7_beta__fa1.log | 6 -
...2B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log | 10 -
...-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log | 10 -
...-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log | 8 -
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...UD-Q8_K_XL-00001-of-00002__vulkan_radv.log | 8 -
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 -
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 6 -
...-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log | 6 -
...8_K_XL-00001-of-00002__rocm7_beta__fa1.log | 6 -
...ct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log | 5 -
...-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log | 5 -
...-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log | 8 -
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...UD-Q8_K_XL-00001-of-00002__vulkan_radv.log | 8 -
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 -
...Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log | 6 -
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 6 -
...struct-Q6_K-00001-of-00002__rocm7_beta.log | 6 -
...t-Q6_K-00001-of-00002__rocm7_beta__fa1.log | 6 -
...Instruct-Q6_K-00001-of-00002__rocm7_rc.log | 10 -
...uct-Q6_K-00001-of-00002__rocm7_rc__fa1.log | 5 -
...uct-Q6_K-00001-of-00002__vulkan_amdvlk.log | 8 -
...6_K-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...truct-Q6_K-00001-of-00002__vulkan_radv.log | 8 -
...-Q6_K-00001-of-00002__vulkan_radv__fa1.log | 8 -
...Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log | 6 -
...00001-of-00003__rocm6_4_2-rocwmma__fa1.log | 6 -
...struct-Q8_0-00001-of-00003__rocm7_beta.log | 6 -
...t-Q8_0-00001-of-00003__rocm7_beta__fa1.log | 6 -
...Instruct-Q8_0-00001-of-00003__rocm7_rc.log | 5 -
...uct-Q8_0-00001-of-00003__rocm7_rc__fa1.log | 5 -
...uct-Q8_0-00001-of-00003__vulkan_amdvlk.log | 8 -
...8_0-00001-of-00003__vulkan_amdvlk__fa1.log | 8 -
...truct-Q8_0-00001-of-00003__vulkan_radv.log | 8 -
...-Q8_0-00001-of-00003__vulkan_radv__fa1.log | 8 -
...K_XL-00001-of-00002__rocm6_4_2-rocwmma.log | 6 -
...00001-of-00002__rocm6_4_2-rocwmma__fa1.log | 6 -
...Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log | 6 -
...-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log | 10 -
...ct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log | 5 -
...-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log | 8 -
..._XL-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...UD-Q4_K_XL-00001-of-00002__vulkan_radv.log | 8 -
..._K_XL-00001-of-00002__vulkan_radv__fa1.log | 8 -
...K_XL-00001-of-00003__rocm6_4_2-rocwmma.log | 6 -
...00001-of-00003__rocm6_4_2-rocwmma__fa1.log | 6 -
...-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log | 6 -
...3_K_XL-00001-of-00003__rocm7_beta__fa1.log | 6 -
...07-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log | 5 -
...-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log | 5 -
...-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log | 8 -
..._XL-00001-of-00003__vulkan_amdvlk__fa1.log | 8 -
...UD-Q3_K_XL-00001-of-00003__vulkan_radv.log | 8 -
..._K_XL-00001-of-00003__vulkan_radv__fa1.log | 8 -
...0B-A3B-BF16-00001-of-00002__rocm7_beta.log | 10 -
...B-BF16-00001-of-00002__rocm7_beta__fa1.log | 10 -
...-30B-A3B-BF16-00001-of-00002__rocm7_rc.log | 10 -
...A3B-BF16-00001-of-00002__rocm7_rc__fa1.log | 10 -
...A3B-BF16-00001-of-00002__vulkan_amdvlk.log | 8 -
...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...B-A3B-BF16-00001-of-00002__vulkan_radv.log | 8 -
...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 -
...B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log | 10 -
...truct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log | 10 -
...A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log | 10 -
...nstruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log | 10 -
...nstruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log | 8 -
...ct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log | 8 -
...-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log | 8 -
...ruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log | 8 -
...BF16-00001-of-00002__rocm6_4_2-rocwmma.log | 10 -
...nstruct-BF16-00001-of-00002__rocm6_4_2.log | 10 -
...struct-BF16-00001-of-00002__rocm7_beta.log | 10 -
...t-BF16-00001-of-00002__rocm7_beta__fa1.log | 6 -
...Instruct-BF16-00001-of-00002__rocm7_rc.log | 10 -
...uct-BF16-00001-of-00002__rocm7_rc__fa1.log | 5 -
...uct-BF16-00001-of-00002__vulkan_amdvlk.log | 8 -
...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...truct-BF16-00001-of-00002__vulkan_radv.log | 8 -
...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 -
.../gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log | 10 -
...a-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log | 10 -
.../gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log | 10 -
...mma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log | 10 -
...mma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log | 8 -
...-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log | 8 -
...gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log | 8 -
...-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log | 8 -
...-27b-it-BF16-00001-of-00002__rocm6_4_2.log | 6 -
...t-BF16-00001-of-00002__rocm7_beta__fa1.log | 10 -
...3-27b-it-BF16-00001-of-00002__rocm7_rc.log | 10 -
...-it-BF16-00001-of-00002__rocm7_rc__fa1.log | 10 -
...-it-BF16-00001-of-00002__vulkan_amdvlk.log | 8 -
...F16-00001-of-00002__vulkan_amdvlk__fa1.log | 8 -
...7b-it-BF16-00001-of-00002__vulkan_radv.log | 8 -
...-BF16-00001-of-00002__vulkan_radv__fa1.log | 8 -
.../gemma-3-4b-it-Q3_K_S__rocm7_beta.log | 10 -
.../gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log | 10 -
.../gemma-3-4b-it-Q3_K_S__rocm7_rc.log | 10 -
.../gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log | 8 -
...mma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log | 8 -
.../gemma-3-4b-it-Q3_K_S__vulkan_radv.log | 8 -
...gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log | 8 -
.../gpt-oss-120b-F16__rocm7_beta.log | 10 -
.../gpt-oss-120b-F16__rocm7_rc.log | 10 -
.../gpt-oss-120b-F16__rocm7_rc__fa1.log | 10 -
.../gpt-oss-120b-F16__vulkan_amdvlk.log | 8 -
.../gpt-oss-120b-F16__vulkan_amdvlk__fa1.log | 8 -
.../gpt-oss-120b-F16__vulkan_radv.log | 8 -
.../gpt-oss-120b-F16__vulkan_radv__fa1.log | 8 -
...-120b-mxfp4-00001-of-00003__rocm7_beta.log | 10 -
...-mxfp4-00001-of-00003__rocm7_beta__fa1.log | 10 -
...ss-120b-mxfp4-00001-of-00003__rocm7_rc.log | 10 -
...0b-mxfp4-00001-of-00003__rocm7_rc__fa1.log | 5 -
...0b-mxfp4-00001-of-00003__vulkan_amdvlk.log | 8 -
...fp4-00001-of-00003__vulkan_amdvlk__fa1.log | 8 -
...120b-mxfp4-00001-of-00003__vulkan_radv.log | 8 -
...mxfp4-00001-of-00003__vulkan_radv__fa1.log | 8 -
.../gpt-oss-20b-F32__rocm7_beta.log | 10 -
.../gpt-oss-20b-F32__rocm7_beta__fa1.log | 10 -
.../gpt-oss-20b-F32__rocm7_rc.log | 10 -
.../gpt-oss-20b-F32__rocm7_rc__fa1.log | 10 -
.../gpt-oss-20b-F32__vulkan_amdvlk.log | 8 -
.../gpt-oss-20b-F32__vulkan_amdvlk__fa1.log | 8 -
.../gpt-oss-20b-F32__vulkan_radv.log | 8 -
.../gpt-oss-20b-F32__vulkan_radv__fa1.log | 8 -
.../gpt-oss-20b-mxfp4__rocm7_beta.log | 10 -
.../gpt-oss-20b-mxfp4__rocm7_beta__fa1.log | 10 -
.../gpt-oss-20b-mxfp4__rocm7_rc.log | 10 -
.../gpt-oss-20b-mxfp4__rocm7_rc__fa1.log | 10 -
.../gpt-oss-20b-mxfp4__vulkan_amdvlk.log | 8 -
.../gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log | 8 -
.../gpt-oss-20b-mxfp4__vulkan_radv.log | 8 -
.../gpt-oss-20b-mxfp4__vulkan_radv__fa1.log | 8 -
...ama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log | 10 -
...3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm6_4_2.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm7_beta.log | 10 -
...llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm7_rc.log | 10 -
.../llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log | 10 -
.../llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log | 8 -
...ma3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log | 8 -
.../llama3.3-70.6B-Q4_K_M__vulkan_radv.log | 8 -
...lama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log | 8 -
.../results_08-08-2025/run_benchmarks.log | 1153 -
benchmark/run_benchmarks.sh | 7 +-
docs/index.html | 3 +-
docs/results.json | 21614 ++++++++--------
657 files changed, 12622 insertions(+), 16283 deletions(-)
create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log => results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log} (68%)
rename benchmark/{results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log => results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/results/{GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log => GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} (68%)
create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log
create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log
rename benchmark/{results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log => results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log} (63%)
create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
rename benchmark/{results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log => results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log} (54%)
rename benchmark/results/{GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log} (69%)
create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
create mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
rename benchmark/{results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log => results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log} (54%)
rename benchmark/{results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log => results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log} (54%)
rename benchmark/results/{llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log} (58%)
rename benchmark/results/{Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log => GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log} (57%)
delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
rename benchmark/{results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log => results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log} (54%)
rename benchmark/{results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log => results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log} (79%)
create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log
rename benchmark/results/{llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log => Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log} (62%)
create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
create mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log
rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log => Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log} (68%)
rename benchmark/{results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log => results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log} (79%)
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log
rename benchmark/{results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log => results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log} (53%)
rename benchmark/{results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log => results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log} (56%)
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log
rename benchmark/{results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log => results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log} (54%)
rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log} (61%)
rename benchmark/{results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log => results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log} (55%)
rename benchmark/results/{Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log => Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log} (62%)
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log
rename benchmark/{results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log => results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log} (53%)
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
rename benchmark/results/{Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log} (67%)
rename benchmark/{results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log => results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log} (78%)
rename benchmark/{results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log => results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} (68%)
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log
rename benchmark/{results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log => results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log} (67%)
rename benchmark/results/{Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log => Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log} (55%)
create mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
rename benchmark/{results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log => results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log} (54%)
create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
rename benchmark/{results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log => results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log} (54%)
create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log
rename benchmark/{results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log => results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log} (53%)
create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log
create mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log
delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log
rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log} (54%)
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log => results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log} (79%)
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log => results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log => results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} (79%)
rename benchmark/results/{Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log => Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log} (54%)
rename benchmark/{results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log => results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log} (78%)
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log => results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log => results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log} (79%)
delete mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log => results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log} (79%)
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log => results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log => results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log} (78%)
create mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log
rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log} (68%)
rename benchmark/{results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log => results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log} (79%)
rename benchmark/results/{Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log => Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log} (68%)
delete mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log
delete mode 100644 benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log => results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log} (79%)
rename benchmark/{results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log => results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log => results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log} (79%)
create mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log
rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log => gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log} (69%)
rename benchmark/{results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log => results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log} (79%)
rename benchmark/results/{gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log => gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log} (69%)
delete mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
delete mode 100644 benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log
create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log => results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log} (79%)
rename benchmark/{results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log => results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log => results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log} (79%)
create mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log
rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log => gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log} (69%)
rename benchmark/{results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log => results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log} (69%)
rename benchmark/results/{gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log => gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log} (69%)
delete mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log
create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log => results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log} (78%)
rename benchmark/{results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log => results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log => results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log} (78%)
create mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log
rename benchmark/{results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log => results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log} (69%)
rename benchmark/{results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log => results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log} (79%)
rename benchmark/results/{gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log => gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log} (68%)
delete mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
delete mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log
create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log => results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log} (79%)
rename benchmark/{results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log => results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/gpt-oss-120b-F16__rocm7_beta__fa1.log => results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log} (69%)
create mode 100644 benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log
rename benchmark/{results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2__fa1.log => results/gpt-oss-120b-F16__rocm6_4_3__fa1.log} (79%)
rename benchmark/{results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2.log => results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log} (79%)
rename benchmark/results/{gpt-oss-120b-F16__rocm7_beta__fa1.log => gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log} (69%)
delete mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_beta.log
delete mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log
create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log => results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log} (79%)
rename benchmark/{results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log => results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/results/{llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log} (57%)
create mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log
rename benchmark/{results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log => results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log} (78%)
rename benchmark/{results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log => results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log} (79%)
rename benchmark/results/{gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log => gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log} (68%)
delete mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log
create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2__fa1.log => results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log} (78%)
rename benchmark/{results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log => results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log => results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log} (79%)
create mode 100644 benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log
rename benchmark/results/{gpt-oss-20b-F32__rocm7_beta__fa1.log => gpt-oss-20b-F32__rocm6_4_3__fa1.log} (69%)
rename benchmark/{results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2.log => results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log} (79%)
rename benchmark/results/{gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log => gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log} (69%)
delete mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_beta.log
delete mode 100644 benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0.log
create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log
rename benchmark/{results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log => results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log} (79%)
rename benchmark/{results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log => results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log} (79%)
rename benchmark/{results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log => results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log} (78%)
create mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log
rename benchmark/results/{gpt-oss-20b-mxfp4__rocm7_beta__fa1.log => gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log} (69%)
rename benchmark/{results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2.log => results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log} (79%)
rename benchmark/results/{gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log => gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log} (68%)
delete mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log
delete mode 100644 benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
delete mode 100644 benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
delete mode 100644 benchmark/results/run_benchmarks.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
delete mode 100644 benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
delete mode 100644 benchmark/results_08-08-2025/run_benchmarks.log
diff --git a/README.md b/README.md
index f670a3f..115e0a0 100644
--- a/README.md
+++ b/README.md
@@ -51,7 +51,6 @@ You can check the containers on DockerHub: https://hub.docker.com/r/kyuz0/amd-st
| `rocm-6.4.2-rocwmma` | ROCm 6.4.2 (HIP) + ROCWMMA | ROCm with ROCWMMA enabled for improved flash attention on RDNA3+/CDNA. |
| `rocm-6.4.3` | ROCm 6.4.3 (HIP) + hipBLASLt* | Latest stable ROCm. Great for BF16 models. Occasional crashes possible. |
| `rocm-6.4.3-rocwmma` | ROCm 6.4.3 (HIP) + ROCWMMA + hipBLASLt* | ROCm with ROCWMMA enabled for improved flash attention on RDNA3+/CDNA. |
-| `rocm-7beta` | ROCm 7.0 Beta (HIP) + hipBLASLt* | Latest ROCm beta. No real gain for Llama.cpp. Same model limits as 6.4.2. |
| `rocm-7rc` | ROCm 7.0 RC (HIP) + hipBLASLt* | Release candidate for ROCm 7.0. Same behavior as beta. |
| `rocm-7rc-rocwmma` | ROCm 7.0 RC (HIP) + ROCWMMA + hipBLASLt* | Release candidate for ROCm 7.0, with hipBLASLt and ROCWMMA for improved flash attention on RDNA3+/CDNA |
diff --git a/benchmark/generate_results.json.py b/benchmark/generate_results.json.py
index 4a0937a..c01279c 100644
--- a/benchmark/generate_results.json.py
+++ b/benchmark/generate_results.json.py
@@ -235,7 +235,7 @@ for path in sorted(glob.glob(os.path.join(RESULTS_DIR, "*.log"))):
# Meta
meta = {
"generated_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
- "os_kernel": "Fedora 42 — Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)",
+ "os_kernel": "Fedora 42 — Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)",
"llamacpp_builds": [{"hash": h, "number": n} for (h, n) in sorted(builds)],
"environments": sorted(envs),
"notes": "pp512 = prompt processing; tg128 = text generation; t/s = tokens/second",
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
index f7d6678..3e888bb 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x19cb8050) reason :GPU Hang
+Memory access fault by GPU node-1 (Agent handle: 0x275a2540) on address 0x7f3fb2c08000. Reason: Page not present or supervisor privilege.
✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
index b800555..694fdbe 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 139.31 ± 0.13 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.97 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x25d19540) reason :GPU Hang
+✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
index 379088a..d19e880 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 130.07 ± 0.32 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.48 ± 0.01 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 131.14 ± 0.28 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.15 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
index 8df6842..e6a5d48 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
@@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x50e2050) reason :GPU Hang
-✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 104.12 ± 0.05 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.35 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..7465f25
--- /dev/null
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.62 ± 0.10 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.95 ± 0.02 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
similarity index 68%
rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
index 94079a7..f8e8b6b 100644
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 103.63 ± 0.10 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.09 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 135.10 ± 0.35 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.14 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
index e1a550e..fad8a13 100644
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 129.88 ± 0.57 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.43 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 130.99 ± 0.36 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.14 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 68%
rename from benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
index d5b577e..d4132eb 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 100.80 ± 0.14 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.13 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 140.15 ± 0.41 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.15 ± 0.01 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log
new file mode 100644
index 0000000..b84b584
--- /dev/null
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 126.66 ± 0.22 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.14 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log
new file mode 100644
index 0000000..350aa44
--- /dev/null
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 100.20 ± 0.13 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.30 ± 0.01 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
similarity index 63%
rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
rename to benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
index d97d416..ec3889b 100644
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2edd2a90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0x2624d340) reason :GPU Hang
+✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
new file mode 100644
index 0000000..2d21418
--- /dev/null
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x37c5d340) on address 0x7f2e3516f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 00fe3c5..0000000
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.50 ± 0.25 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 20.02 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index 305470f..0000000
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 130.22 ± 0.35 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.00 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 7cca3a8..0000000
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1f3f20c0) reason :GPU Hang
-✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
index a945543..d99dfb8 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 120.16 ± 0.21 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.96 ± 0.01 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.48 ± 0.53 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.11 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
index d65e6e5..b74a931 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 133.91 ± 0.57 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 19.94 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.27 ± 0.47 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.86 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
index 10c8b0a..906c8ff 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.49 ± 0.48 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.95 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 158.54 ± 0.42 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.11 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
index 1d0ab1d..d51baac 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 138.34 ± 0.27 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.90 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 166.11 ± 0.32 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 19.83 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
index 9927f0b..3c47854 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.65 ± 0.23 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.91 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 89.60 ± 0.20 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 20.22 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
index 86f99ad..bf63f0d 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 100.90 ± 0.22 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 20.15 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 64.66 ± 0.16 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.35 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log
index 441e956..a3b2c08 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 129.49 ± 0.14 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 19.88 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x1d380ea0) reason :GPU Hang
+✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
index 28b4354..9cc7929 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 103.73 ± 0.14 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 20.07 ± 0.00 |
-
-build: 79c1160b (6123)
+Memory access fault by GPU node-1 (Agent handle: 0x4a0fea0) on address 0x7f3bf796f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
index 15c9127..d8fa4d5 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 201.03 ± 0.31 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.82 ± 0.00 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 197.95 ± 0.29 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 23.24 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
index c0e6775..eece528 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 201.89 ± 0.37 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 22.85 ± 0.01 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 199.40 ± 0.35 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 23.26 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
index c38f2a1..5b8bc47 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 128.01 ± 0.31 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.92 ± 0.01 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.28 ± 0.17 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 23.33 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
index 12bf239..41d8077 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 132.56 ± 0.31 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 23.31 ± 0.01 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 131.64 ± 0.32 |
+| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 23.88 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
index deada9f..3ee3c3e 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 124.75 ± 0.42 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.43 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x3e28b540) reason :GPU Hang
+✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
index 6e7bcaa..2b15919 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2d9b050) reason :GPU Hang
+Memory access fault by GPU node-1 (Agent handle: 0x2bdf8540) on address 0x7f5f95e35000. Reason: Page not present or supervisor privilege.
✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log
index 685d734..63bd38e 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.94 ± 0.42 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.35 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x3ff2d540) reason :GPU Hang
+✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log
index b9a03bd..18f04dd 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 100.41 ± 0.16 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.53 ± 0.01 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x3bb3540) reason :GPU Hang
+✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log
similarity index 54%
rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log
index 5dc10c6..f5f0ad3 100644
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log
@@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 113.62 ± 0.21 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.47 ± 0.04 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 121.82 ± 0.35 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.59 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
similarity index 69%
rename from benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log
rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
index 1208365..2bfbda7 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 90.24 ± 0.13 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.55 ± 0.04 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 126.60 ± 0.30 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.62 ± 0.04 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
new file mode 100644
index 0000000..159477b
--- /dev/null
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x26e36340) on address 0x7fcef3635000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
new file mode 100644
index 0000000..f625092
--- /dev/null
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x35263340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log
similarity index 54%
rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log
rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log
index d1de7a1..8ac4440 100644
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log
@@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.82 ± 0.18 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.35 ± 0.00 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 117.95 ± 0.30 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.65 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log
similarity index 54%
rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log
rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log
index 5ed10e0..581bc16 100644
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x1527fa90) on address 0x7f55d5f6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
+Memory access fault by GPU node-1 (Agent handle: 0x28aa3340) on address 0x7fb93761b000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log
similarity index 58%
rename from benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log
index d800da3..486bf8f 100644
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x3e596050) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M __fa1 failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0x14d05340) reason :GPU Hang
+✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log
similarity index 57%
rename from benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
rename to benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log
index c119ae0..fb08717 100644
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2dab2050) reason :GPU Hang
-✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0x265e8340) reason :GPU Hang
+✖ ! [rocm6_4_3] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index d839f3e..0000000
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 118.61 ± 0.54 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.51 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log
deleted file mode 100644
index 8a0ff5a..0000000
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 123.75 ± 0.39 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.48 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 0aab0d1..0000000
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x36bce0c0) on address 0x7f6ee1f6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
index 55995b2..8ca5c1e 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 118.92 ± 0.39 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.47 ± 0.00 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 69.19 ± 0.20 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.64 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
index 161145c..6eaa1df 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 127.14 ± 0.27 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.47 ± 0.00 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 114.61 ± 0.20 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.51 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
index 5e8d103..5fb6167 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 120.88 ± 0.92 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.61 ± 0.09 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
index d77ab42..ec552ad 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 150.07 ± 0.56 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.52 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
index 97c3a25..dc5e6ec 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 118.52 ± 0.35 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.52 ± 0.00 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 69.52 ± 0.17 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.63 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
index c6194e3..c1980f8 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | pp512 | 97.36 ± 0.07 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 1 | 0 | tg128 | 15.57 ± 0.02 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | pp512 | 74.02 ± 0.13 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 1 | 0 | tg128 | 15.73 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log
index 7f9bb58..350f64d 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | pp512 | 142.67 ± 0.75 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 99 | 0 | tg128 | 15.68 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log
index 8e70bdf..6a1fdfd 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log
@@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x1c536ea0) on address 0x7f623b57e000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
index f5209b4..1a18e9c 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 223.59 ± 0.50 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.51 ± 0.01 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 219.81 ± 0.70 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 16.80 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
index 3ba2fc2..bc34d0a 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 225.75 ± 0.69 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.53 ± 0.00 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 222.20 ± 0.63 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 16.82 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
index c01b8a2..1c621cf 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.35 ± 0.43 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.80 ± 0.00 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | pp512 | 126.55 ± 0.40 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 0 | tg128 | 17.07 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
index 3647c19..3dbeebf 100644
--- a/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
+++ b/benchmark/results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 131.91 ± 0.42 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 17.02 ± 0.00 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | pp512 | 131.25 ± 0.50 |
+| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 99 | 1 | 0 | tg128 | 17.31 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
deleted file mode 100644
index 49b6a40..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0xae0b050) on address 0x7f17943a9000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 820c8ea..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 108.88 ± 0.21 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.65 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index ecdf26e..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x1f7690e0) on address 0x7f6093d6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index 9bbdd27..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2ae290c0) reason :GPU Hang
-✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 3a354c6..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x19f880e0) reason :GPU Hang
-✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
deleted file mode 100644
index 01916bd..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 109.02 ± 0.07 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.65 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
deleted file mode 100644
index 4d3f05b..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | pp512 | 117.34 ± 0.09 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | tg128 | 2.65 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
deleted file mode 100644
index 08bd375..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
deleted file mode 100644
index 3d74d91..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index 343727e..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 109.17 ± 0.12 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.65 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index d9b5fe6..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log
deleted file mode 100644
index fac1830..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
deleted file mode 100644
index f08d646..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index 4581b23..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-ggml_vulkan: Device memory allocation of size 2491416576 failed.
-ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf'
-✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 8835330..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-ggml_vulkan: Device memory allocation of size 2491416576 failed.
-ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf'
-✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 1)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index f0955ad..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | pp512 | 78.54 ± 0.14 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | tg128 | 2.67 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index 0c2bb42..0000000
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | pp512 | 81.12 ± 0.08 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | tg128 | 2.67 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
index 5c9071b..4fb737a 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0xd004050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0x33b8a540) reason :GPU Hang
✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
index 182cfd1..8ed4e21 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1fdc2050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0x20e35540) reason :GPU Hang
✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
index 1e5d45d..8ad5ab6 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.28 ± 0.05 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x1b1ea540) reason :GPU Hang
+✖ ! [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
index db68588..7860063 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 30.88 ± 0.02 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 16.16 ± 0.02 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
similarity index 54%
rename from benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
index 409a36b..769f2f3 100644
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
@@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.32 ± 0.04 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 98.02 ± 0.18 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
index 952dddb..4451e68 100644
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.28 ± 0.02 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.74 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 101.83 ± 0.11 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
new file mode 100644
index 0000000..2553f31
--- /dev/null
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x21da1340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
new file mode 100644
index 0000000..1a88dcd
--- /dev/null
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x15ac2340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log
new file mode 100644
index 0000000..56497ee
--- /dev/null
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.13 ± 0.17 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log
similarity index 62%
rename from benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log
rename to benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log
index cc4c84c..08a5922 100644
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.69 ± 0.04 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.62 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 80.42 ± 0.08 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
new file mode 100644
index 0000000..f5b7147
--- /dev/null
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x2c1e5340) reason :GPU Hang
+✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
new file mode 100644
index 0000000..013fc7c
--- /dev/null
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x3e536340) on address 0x7f9182f6f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 1275625..0000000
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 95.65 ± 0.23 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.74 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index d4fb01f..0000000
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x2e9460f0) on address 0x7f23cf58a000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index 256df75..0000000
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2c3170e0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 51e9900..0000000
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0xe3f70e0) on address 0x7f4e23b6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
index 2d8ff98..62ab0f7 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 95.63 ± 0.19 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.73 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 97.31 ± 0.20 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
index 78ef763..1e31a60 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 103.15 ± 0.13 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 100.85 ± 0.13 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
index 8abfc94..c5612f6 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 93.00 ± 0.22 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
index b85ea71..e557123 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 97.88 ± 0.09 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
index 7715818..7c6d5fd 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 95.15 ± 0.14 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 2.74 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 99.41 ± 0.36 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.77 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
index a10eecc..f65845d 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-:0:rocdevice.cpp :3594: 448132897452 us: Callback: Queue 0x7f7ecc400000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016
+Memory access fault by GPU node-1 (Agent handle: 0x1f66bec0) on address 0x7f3e84b6f000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log
index e557f4b..44b15c4 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 94.06 ± 0.09 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 2.78 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
index f3c72e9..970522b 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 30.04 ± 0.04 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 2.74 ± 0.00 |
-
-build: 79c1160b (6123)
+Memory access fault by GPU node-1 (Agent handle: 0xac09ec0) on address 0x7f283f56f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
index ac2d0df..13252f9 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 98.20 ± 0.18 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.75 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 98.03 ± 0.24 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
index 9e22472..5781898 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 99.14 ± 0.35 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.74 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 99.12 ± 0.25 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.77 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
index b4da67c..9500e62 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 79.91 ± 0.16 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.75 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | pp512 | 75.59 ± 0.28 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 0 | tg128 | 2.78 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
index 5e3f60f..6c47ac0 100644
--- a/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ b/benchmark/results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 82.40 ± 0.16 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.75 ± 0.00 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | pp512 | 80.09 ± 0.38 |
+| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 99 | 1 | 0 | tg128 | 2.78 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
index b5a7155..6babfa8 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 134.21 ± 0.58 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.43 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x344ea540) reason :GPU Hang
+✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
index 3a30aaf..2f3524f 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x10997050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0xe316540) reason :GPU Hang
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log
index f81dd87..1009e19 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 133.77 ± 0.46 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.30 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x17ade540) reason :GPU Hang
+✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log
index b6d7516..c7625db 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x1732e050) on address 0x7fcb1a36f000. Reason: Page not present or supervisor privilege.
+HW Exception by GPU node-1 (Agent handle: 0xe91f540) reason :GPU Hang
✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..ebcf552
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log
@@ -0,0 +1,11 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+Memory access fault by GPU node-1 (Agent handle: 0x1a840340) on address 0x7f3babb56000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
similarity index 68%
rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
index 6375788..ab25429 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 103.96 ± 0.18 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.47 ± 0.02 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 291.08 ± 1.26 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.53 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
index f7132fb..65957d1 100644
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 134.39 ± 0.32 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.33 ± 0.00 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 134.19 ± 1.49 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.56 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
new file mode 100644
index 0000000..dcff3bd
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x1de78340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log
new file mode 100644
index 0000000..f75714d
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.28 ± 1.29 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.58 ± 0.03 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log
similarity index 53%
rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log
index 53feea1..7c8dc76 100644
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x3b11ea90) reason :GPU Hang
-✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
+Memory access fault by GPU node-1 (Agent handle: 0x2162b340) on address 0x7f500556f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log
similarity index 56%
rename from benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log
index d044208..96e7a94 100644
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x432ea90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0xdacf340) reason :GPU Hang
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log
new file mode 100644
index 0000000..65a2e33
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x3dc00340) reason :GPU Hang
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index e2f4dbe..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x225860e0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 311e082..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-:0:rocdevice.cpp :3675: 454572762136 us: Callback: Queue 0x7fb3f1400000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index 7188df3..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x11dec0e0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log
index be95461..64b52d5 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 273.64 ± 0.59 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 |
-
-build: 34c9d765 (6122)
+HW Exception by GPU node-1 (Agent handle: 0x3882bf60) reason :GPU Hang
+✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log
index 70811e5..11cbda1 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 293.87 ± 1.35 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 14.31 ± 0.00 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.84 ± 9.41 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.37 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
index fa50f09..7575a7e 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 273.97 ± 1.67 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.57 ± 0.05 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
index e9eb9a3..647f737 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 285.26 ± 1.79 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 14.33 ± 0.03 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
index 07c550f..6ac1b55 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 269.30 ± 1.99 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 276.37 ± 1.65 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.57 ± 0.04 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
index 0a2d01d..bc46574 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 225.70 ± 1.00 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 14.46 ± 0.00 |
-
-build: 79c1160b (6123)
+Memory access fault by GPU node-1 (Agent handle: 0xa893ec0) on address 0x7f070a3a9000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log
index c9103d5..ca30067 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 135.16 ± 0.44 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.41 ± 0.00 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 269.17 ± 0.99 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 14.63 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log
index 560fe07..56c35f6 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0__fa1.log
@@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x1db86ec0) on address 0x7f2273f6f000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
index 3cc2007..8c8e292 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 243.54 ± 1.24 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.34 ± 0.00 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 242.07 ± 1.05 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.56 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
index 7dafa9d..71556fe 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 246.48 ± 1.35 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.09 ± 0.01 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 244.49 ± 1.13 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.33 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
index 80c940d..33a8b80 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 147.36 ± 0.80 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.30 ± 0.01 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 147.08 ± 0.98 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 15.50 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
index e72dffe..caf7973 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 150.06 ± 1.13 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.27 ± 0.00 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 149.97 ± 1.10 |
+| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 15.49 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
index 0910303..a8331aa 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 135.23 ± 0.81 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.62 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x1019d540) reason :GPU Hang
+✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
index 07a24ef..b68e909 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0xf461050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0x2ff5c540) reason :GPU Hang
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log
index db50520..88fb55c 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.29 ± 0.58 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.60 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x3db80540) reason :GPU Hang
+✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log
index 4d84455..befc174 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x13dd2050) on address 0x7f6913b6f000. Reason: Page not present or supervisor privilege.
+HW Exception by GPU node-1 (Agent handle: 0x24a4c540) reason :GPU Hang
✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log
similarity index 54%
rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log
index 8678b7b..40cf34f 100644
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log
@@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.44 ± 0.76 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.61 ± 0.00 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 270.35 ± 3.39 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.78 ± 0.03 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
similarity index 61%
rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
index 911ffe7..4a19b96 100644
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 153.97 ± 1.90 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.98 ± 0.01 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 292.23 ± 3.13 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.73 ± 0.03 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
similarity index 55%
rename from benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
index 1849a77..65afec2 100644
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x64dea90) reason :GPU Hang
-✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0x5f69340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 62%
rename from benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
index bb5ac9d..98dc8c5 100644
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 99 | 1 | 0 | pp512 | 33.87 ± 0.05 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 99 | 1 | 0 | tg128 | 2.64 ± 0.00 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 140.27 ± 0.97 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.74 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log
new file mode 100644
index 0000000..8ac0514
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log
@@ -0,0 +1,11 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+HW Exception by GPU node-1 (Agent handle: 0x2079b340) reason :GPU Hang
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log
similarity index 53%
rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log
index 099d9b2..8d6068b 100644
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2fba3a90) reason :GPU Hang
-✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
+Memory access fault by GPU node-1 (Agent handle: 0x37ff7340) on address 0x7fa76bba9000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log
new file mode 100644
index 0000000..2add86a
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x2a344340) reason :GPU Hang
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log
new file mode 100644
index 0000000..977948c
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x27934340) on address 0x7f656656f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index 1441b69..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 262.13 ± 9.71 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.65 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
deleted file mode 100644
index 4dc7de6..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x2b4130e0) on address 0x7f8a7ed6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log
deleted file mode 100644
index 236d063..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x12790e0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 3db64ad..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x14e4a0e0) on address 0x7f859916f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log
index 930340d..03112eb 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 267.45 ± 1.90 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.60 ± 0.05 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 279.13 ± 2.90 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.79 ± 0.07 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log
index 1ee598c..182bf9d 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 293.37 ± 7.08 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 11.54 ± 0.03 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 293.60 ± 3.84 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.62 ± 0.02 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
index b62035e..66c4a41 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 264.02 ± 2.74 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.79 ± 0.06 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
index f49c465..5439b6b 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 279.69 ± 2.30 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 11.60 ± 0.04 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
index 8f33074..f3af1c9 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 272.38 ± 1.28 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 11.64 ± 0.01 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 252.38 ± 7.70 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.35 ± 0.60 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
index 3ccfa82..9932c99 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
@@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x2e56aec0) on address 0x7f4102f6f000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log
index 2758045..d31f283 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 271.54 ± 4.10 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 11.57 ± 0.58 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log
index 0ab337b..986706b 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0__fa1.log
@@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x1aa83ec0) on address 0x7f9f1e96f000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
index 40cd552..089be05 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 255.55 ± 1.38 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.27 ± 0.01 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 258.54 ± 1.39 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.45 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
index e8041dc..42f4672 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 259.07 ± 1.30 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.11 ± 0.01 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 262.84 ± 1.39 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.30 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
index e52091e..e7df5fa 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 168.01 ± 0.85 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.30 ± 0.00 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 169.23 ± 0.84 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 12.45 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
index 154221d..2776458 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 172.71 ± 0.91 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.28 ± 0.00 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 173.79 ± 0.85 |
+| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.44 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
index f13eb82..90103a9 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 137.82 ± 0.73 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.41 ± 0.00 |
-
-build: 79c1160b (6123)
+Memory access fault by GPU node-1 (Agent handle: 0x3e5ce540) on address 0x7f64d3b76000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
index 5cd6f40..2e11ead 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1624d050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0x1239e540) reason :GPU Hang
✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
index d0be2b8..b311bf5 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 137.63 ± 0.80 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.29 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x101f4540) reason :GPU Hang
+✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
index e7a2f72..8ac1834 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 122.98 ± 0.59 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.53 ± 0.00 |
-
-build: 79c1160b (6123)
+Memory access fault by GPU node-1 (Agent handle: 0x15f12540) on address 0x7ef17d976000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..f500cd5
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.51 ± 1.64 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.70 ± 0.01 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
similarity index 67%
rename from benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
index c46ae93..85f9bfb 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 233.14 ± 0.90 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.59 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 308.62 ± 2.62 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.54 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
similarity index 78%
rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
index 3ec496d..78e4255 100644
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.27 ± 0.66 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.40 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 137.71 ± 0.62 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.71 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 68%
rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
index bde171a..8945a72 100644
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.58 ± 0.18 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.55 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 142.62 ± 0.82 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.55 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log
new file mode 100644
index 0000000..5aa96bf
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 286.37 ± 1.44 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.70 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log
similarity index 67%
rename from benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log
index ee0d484..98b05eb 100644
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 123.61 ± 0.50 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.60 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 234.68 ± 1.31 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.71 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
similarity index 55%
rename from benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
rename to benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
index 2da2c5e..186cdf8 100644
--- a/benchmark/results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0xd98d050) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0xa3c7340) reason :GPU Hang
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
new file mode 100644
index 0000000..19b75a4
--- /dev/null
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x118c6340) reason :GPU Hang
+✖ ! [rocm6_4_3] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index cc14e7d..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 281.87 ± 1.98 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.59 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index 05da15f..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2334b0e0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 20fd2ca..0000000
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1b1f20f0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
index 60b7302..54d6795 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 290.54 ± 1.59 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.67 ± 0.01 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
index 4935293..6f81260 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | pp512 | 307.08 ± 2.67 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 1 | 0 | tg128 | 17.34 ± 0.01 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 304.99 ± 0.37 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.28 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
index ac13496..721ffc5 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 138.22 ± 0.46 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.45 ± 0.09 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 283.93 ± 1.57 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.65 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
index 12bb02b..241fb21 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 300.13 ± 1.26 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.27 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
index 1e4897a..19895f7 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 281.24 ± 1.95 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.56 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 291.60 ± 1.95 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.73 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
index 9eb1c08..d607e30 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
@@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x134adec0) on address 0x7f0318984000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log
index ca4dda3..a0c7756 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | pp512 | 285.56 ± 1.41 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 0 | tg128 | 17.72 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
index 9086eec..ae6829f 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | pp512 | 227.75 ± 1.52 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 99 | 1 | 0 | tg128 | 17.73 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
index f03b1f2..f89d42c 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 218.27 ± 0.80 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.09 ± 0.01 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 216.64 ± 2.76 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.39 ± 0.02 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
index 2706deb..7ee60d9 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 220.73 ± 0.69 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.64 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 217.68 ± 4.15 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 19.97 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
index 8058f91..002154b 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 152.77 ± 0.73 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.02 ± 0.01 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | pp512 | 151.98 ± 0.60 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 0 | tg128 | 20.26 ± 0.02 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
index 953d42a..64d4625 100644
--- a/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ b/benchmark/results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 155.24 ± 1.01 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.99 ± 0.00 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | pp512 | 154.96 ± 0.82 |
+| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 99 | 1 | 0 | tg128 | 20.28 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
index 24b7806..1adbae9 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x3eeda050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0x2f5d1540) reason :GPU Hang
✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
index c0f73f3..9a061f5 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2d723050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0xdc93540) reason :GPU Hang
✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log
index 95b0795..a1be58c 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 73.83 ± 0.16 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.68 ± 0.01 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0xff7540) reason :GPU Hang
+✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log
index 22c3d0b..281a126 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | pp512 | 61.47 ± 0.09 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | tg128 | 13.83 ± 0.00 |
-
-build: 79c1160b (6123)
+HW Exception by GPU node-1 (Agent handle: 0x2607e540) reason :GPU Hang
+✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log
similarity index 54%
rename from benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log
rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log
index 1bc098e..d927ed6 100644
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log
@@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 74.15 ± 0.18 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.73 ± 0.00 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 130.11 ± 0.68 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.95 ± 0.04 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
new file mode 100644
index 0000000..4638f26
--- /dev/null
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
@@ -0,0 +1,10 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 144.31 ± 0.80 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.71 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
similarity index 54%
rename from benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
index a418f5b..d8218bd 100644
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0xcd80a90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0x8063340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
new file mode 100644
index 0000000..3a247e3
--- /dev/null
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x18398340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log
new file mode 100644
index 0000000..82caf25
--- /dev/null
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 131.78 ± 1.03 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.68 ± 0.43 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log
similarity index 53%
rename from benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log
rename to benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log
index 40b3223..a0ed178 100644
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x25011a90) on address 0x7fdcc1b6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
+Memory access fault by GPU node-1 (Agent handle: 0x50aa340) on address 0x7f7365ba9000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log
new file mode 100644
index 0000000..3767019
--- /dev/null
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+HW Exception by GPU node-1 (Agent handle: 0x1990d340) reason :GPU Hang
+✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log
new file mode 100644
index 0000000..f1dc1f5
--- /dev/null
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log
@@ -0,0 +1,6 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x180d4340) on address 0x7f11c8f6f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm6_4_3] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index c4bfe32..0000000
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x359cb0e0) reason :GPU Hang
-✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
deleted file mode 100644
index 4707f93..0000000
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,7 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-:0:rocdevice.cpp :3675: 456558403486 us: Callback: Queue 0x7f04ef600000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016
-Memory access fault by GPU node-1 (Agent handle: 0x2e8f0f0) on address 0x7eeca7f6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log
deleted file mode 100644
index 4554363..0000000
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1c2260e0) reason :GPU Hang
-✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index d31397b..0000000
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x11f900f0) on address 0x7f6f91d6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
index 440a82e..38826b0 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 129.70 ± 0.81 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.66 ± 0.00 |
-
-build: 34c9d765 (6122)
+Memory access fault by GPU node-1 (Agent handle: 0x3c89ef80) on address 0x7f777640a000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
index 15a84cb..a74258c 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | pp512 | 145.18 ± 0.48 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | tg128 | 13.43 ± 0.00 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 141.61 ± 0.92 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.34 ± 0.02 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
index 7597916..966ba3e 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 133.33 ± 0.68 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.78 ± 0.04 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
index af8634b..22a4e47 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | pp512 | 139.60 ± 0.47 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 1 | 0 | tg128 | 13.03 ± 0.57 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
index d8318fb..daaa296 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | pp512 | 130.56 ± 0.46 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 0 | tg128 | 13.87 ± 0.02 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 134.95 ± 0.76 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.99 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
index 3d86ec7..0d3acaf 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
@@ -2,9 +2,6 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | pp512 | 97.08 ± 0.34 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 999 | 1 | 0 | tg128 | 13.90 ± 0.03 |
-
-build: 79c1160b (6123)
+:0:rocdevice.cpp :3675: 29915649820 us: Callback: Queue 0x7f500c700000 aborting with error : HSA_STATUS_ERROR_EXCEPTION: An HSAIL operation resulted in a hardware exception. code: 0x1016
+Memory access fault by GPU node-1 (Agent handle: 0x2aad9ec0) on address 0x7f37c576f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log
index 3cb748b..8135bc1 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0 failed (exit 134)
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | pp512 | 135.29 ± 0.51 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | ROCm | 99 | 0 | tg128 | 13.97 ± 0.04 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log
index 443bad3..fc4a37f 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0__fa1.log
@@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x122f2ec0) on address 0x7f10537a9000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
index b14c784..ec978bb 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 114.76 ± 0.62 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.06 ± 0.00 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 112.93 ± 0.63 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 16.43 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
index c01f816..e560c0b 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 116.18 ± 0.67 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 15.90 ± 0.01 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 114.35 ± 1.12 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 16.27 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
index 077cf15..5fbf704 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 64.79 ± 0.39 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.61 ± 0.00 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | pp512 | 64.60 ± 0.38 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 0 | tg128 | 17.03 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
index d9c6cb3..e878778 100644
--- a/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
+++ b/benchmark/results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 66.84 ± 0.42 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 16.86 ± 0.01 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | pp512 | 66.60 ± 0.42 |
+| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 99 | 1 | 0 | tg128 | 17.28 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
index 562318d..4f8da20 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.78 ± 2.71 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.56 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.75 ± 2.58 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.62 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
index 77ad01b..598f905 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 161.64 ± 2.99 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.94 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 161.90 ± 3.05 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.09 ± 0.02 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log
index 9a6adc0..ae13cea 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.64 ± 2.49 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.93 ± 0.01 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 157.81 ± 2.51 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.61 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log
index 000d477..2791323 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 140.32 ± 1.99 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.32 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 140.24 ± 1.86 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.46 ± 0.02 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log
similarity index 54%
rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log
index 3e8cb4e..cd9f858 100644
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log
@@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 150.37 ± 1.75 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.49 ± 0.01 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 438.42 ± 4.14 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.57 ± 0.01 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
index af7dc3f..3adfafe 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 162.19 ± 3.06 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.03 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 475.43 ± 7.40 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.08 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
index 3c0cef6..96ee7c7 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.95 ± 2.63 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.53 ± 0.01 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 158.13 ± 2.40 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.58 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
index 86ac559..e330db0 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 140.32 ± 2.10 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.33 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 163.40 ± 3.21 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.14 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log
similarity index 54%
rename from benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log
index dc8823d..f983fbc 100644
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log
@@ -2,9 +2,14 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.06 ± 1.71 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.13 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 441.36 ± 3.35 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.60 ± 0.01 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log
similarity index 78%
rename from benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log
index ded0220..04c9b9c 100644
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 154.09 ± 1.98 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.02 ± 0.01 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 337.36 ± 3.48 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.45 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log
index 03365ca..c1ff6cb 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 157.69 ± 2.52 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 23.89 ± 0.01 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 161.73 ± 1.23 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.58 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log
index 207a2a1..aaa3b75 100644
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 134.40 ± 1.47 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.32 ± 0.01 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 143.05 ± 2.10 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.42 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 6b685b0..0000000
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 424.74 ± 7.06 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.48 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 2940f7a..0000000
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x16acc0c0) on address 0x7f24fed6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index b98ad69..0000000
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 154.45 ± 1.39 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.52 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 8773673..0000000
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 138.46 ± 1.64 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.29 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log
index 5088e35..7f744f7 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 425.56 ± 3.28 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.80 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 448.63 ± 5.90 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.96 ± 0.02 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
index 20a8ebc..61f996e 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 472.05 ± 4.59 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.12 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 473.34 ± 8.60 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.99 ± 0.01 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
index 6fb55b7..8b322b7 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 153.54 ± 2.25 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.74 ± 0.01 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 338.07 ± 3.03 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.93 ± 0.03 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
index e0dd74e..a3675a1 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 158.20 ± 2.47 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.12 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 342.57 ± 3.12 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 23.97 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
index 1c911fe..c7f3224 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 426.72 ± 7.55 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.57 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 444.30 ± 6.78 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.66 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
index d2a18a1..8384bda 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
@@ -2,4 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+Memory access fault by GPU node-1 (Agent handle: 0x38fecea0) on address 0x7f31ea76f000. Reason: Page not present or supervisor privilege.
✖ ! [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log
index 25f3bf6..aab676c 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 153.89 ± 1.73 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.57 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 333.42 ± 6.83 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.69 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
index 53b2312..dba441c 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
@@ -2,9 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 137.06 ± 2.00 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.32 ± 0.01 |
-
-build: 79c1160b (6123)
+Memory access fault by GPU node-1 (Agent handle: 0x1f121ea0) on address 0x7fd78e16f000. Reason: Page not present or supervisor privilege.
+✖ ! [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
index fc1a60f..67aa5d3 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 107.55 ± 0.11 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.09 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 106.47 ± 0.10 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 8.18 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
index f84135e..de3ec24 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.68 ± 0.13 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 8.03 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 106.77 ± 0.12 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 8.11 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
index 1458372..7ea35f3 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 86.02 ± 0.11 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.46 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 84.71 ± 0.11 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 7.52 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
index ec2a50a..2aa8bc0 100644
--- a/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 86.93 ± 0.15 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.44 ± 0.00 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 85.70 ± 0.10 |
+| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 7.52 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log
index 015e9b4..14182c9 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.45 ± 1.17 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.42 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.23 ± 0.82 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.64 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log
index a5d6d7e..63e83fb 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 411.60 ± 0.78 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.14 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 411.72 ± 1.04 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.78 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log
index 4565d4e..f33f7c4 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 385.52 ± 0.67 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.06 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.86 ± 1.41 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.65 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log
index a0064ea..928cc4b 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 300.86 ± 0.38 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.71 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 301.23 ± 0.49 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.07 ± 0.02 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..7499112
--- /dev/null
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 564.83 ± 6.58 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.68 ± 0.01 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log
index 60992bd..8947515 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 412.35 ± 1.06 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 48.26 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 624.99 ± 3.81 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.64 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log
rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log
index dc6f1a9..1488828 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.77 ± 0.97 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.31 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 389.25 ± 2.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.66 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 78%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log
index 2a04531..d3d972e 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 301.29 ± 0.54 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.58 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 412.18 ± 1.15 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 48.80 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log
new file mode 100644
index 0000000..bb39f34
--- /dev/null
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 562.86 ± 10.14 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.74 ± 0.01 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log
similarity index 68%
rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log
index bbdc595..e501cb6 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 411.72 ± 2.56 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.76 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 418.07 ± 1.65 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.11 ± 0.01 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log
rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log
index bd9bc1c..87b8aec 100644
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.72 ± 2.63 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.19 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.74 ± 1.70 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.65 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log
similarity index 68%
rename from benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log
rename to benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log
index 6a605f7..17416ee 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 300.58 ± 1.17 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 49.78 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 301.31 ± 0.65 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.37 ± 0.02 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log
deleted file mode 100644
index 7b4fa67..0000000
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 534.84 ± 2.48 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.21 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log
deleted file mode 100644
index f59b880..0000000
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.34 ± 1.49 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.23 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log
index bd849a1..444ca09 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 535.44 ± 6.90 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.07 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 570.31 ± 5.05 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.52 ± 0.02 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log
index 57f3363..c11bc62 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 619.02 ± 7.73 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 47.63 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 612.79 ± 4.77 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 46.73 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log
index 922286f..e17b52a 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 387.98 ± 0.76 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.09 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 572.09 ± 8.22 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.45 ± 0.01 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log
index a66f360..19e8321 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 413.28 ± 2.05 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 47.63 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 605.49 ± 1.47 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 46.73 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log
index 625d68e..f2e3ece 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 540.14 ± 5.22 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.65 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 573.05 ± 6.77 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.80 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log
index 43c8e66..3fa7435 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 418.60 ± 2.58 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.63 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 416.05 ± 3.44 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.33 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log
index 74fbd99..db8de56 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 386.87 ± 1.67 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.50 ± 0.01 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 576.38 ± 3.91 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 50.85 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log
index 763fdaf..f19e470 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 300.40 ± 1.44 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 49.69 ± 0.00 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 414.62 ± 3.23 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 50.22 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log
index 4f467f9..10609cc 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 741.97 ± 2.92 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 57.22 ± 0.02 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 733.40 ± 2.59 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 59.36 ± 0.05 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log
index 8cbd25b..486113e 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 731.64 ± 2.80 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 53.53 ± 0.02 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 725.54 ± 2.84 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 55.57 ± 0.02 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log
index 993ae07..00cd713 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 396.38 ± 1.53 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 59.54 ± 0.02 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | pp512 | 392.54 ± 1.80 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 0 | tg128 | 61.56 ± 0.02 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log
index 296a137..3123ba1 100644
--- a/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log
+++ b/benchmark/results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 406.84 ± 1.62 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 58.50 ± 0.10 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | pp512 | 403.74 ± 1.69 |
+| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 99 | 1 | 0 | tg128 | 60.57 ± 0.08 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log
deleted file mode 100644
index d63b9d0..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x168bc050) on address 0x7ef358d6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 4d0291a..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 408.29 ± 1.82 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.53 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 0830d17..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0xf2660e0) on address 0x7fb2199a9000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index eb240a1..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 145.29 ± 1.91 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.53 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index d7843f5..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 130.39 ± 1.57 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.31 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log
deleted file mode 100644
index 0a1bfa8..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 414.47 ± 3.10 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.61 ± 0.01 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
deleted file mode 100644
index 8c77605..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 460.12 ± 5.58 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.02 ± 0.01 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
deleted file mode 100644
index 6c331f6..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 145.43 ± 1.04 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.80 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
deleted file mode 100644
index b9050d5..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 150.58 ± 1.93 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.13 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index 04d0c86..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 413.05 ± 2.36 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.15 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index c7a5573..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 325.48 ± 1.77 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.31 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log
deleted file mode 100644
index c900ebe..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | pp512 | 145.83 ± 2.39 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 0 | tg128 | 24.12 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
deleted file mode 100644
index a409195..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | pp512 | 130.20 ± 1.39 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 99 | 1 | 0 | tg128 | 24.35 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index 6c09327..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 107.16 ± 0.06 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.08 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 5f7c40c..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.26 ± 0.11 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 8.04 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index a65273a..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.88 ± 0.10 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.48 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index a14f281..0000000
--- a/benchmark/results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 86.57 ± 0.11 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.49 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log
index 4638587..a6af248 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 192.14 ± 0.71 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 10.75 ± 3.44 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.91 ± 0.21 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.03 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log
index 2b45f78..333ac47 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 229.77 ± 0.18 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.58 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 229.15 ± 0.24 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.76 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log
index fe52481..f26f454 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.24 ± 0.39 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.59 ± 0.24 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.03 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log
index 62f3c9e..df5dd02 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 201.58 ± 0.09 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.57 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 197.89 ± 3.40 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.76 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..ba675a0
--- /dev/null
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.26 ± 0.94 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log
index 348f5ed..a834fd9 100644
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 229.77 ± 0.32 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.59 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 820.41 ± 1.59 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.77 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log
rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log
index 9f1e992..3b668bb 100644
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 223.38 ± 0.29 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 213.40 ± 3.62 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.04 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log
rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log
index de6f8de..ed14086 100644
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 202.13 ± 0.24 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.58 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 224.20 ± 4.73 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log
new file mode 100644
index 0000000..b6d34c8
--- /dev/null
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 734.70 ± 1.48 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.03 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log
similarity index 69%
rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log
rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log
index 214947a..a9a4641 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 567.65 ± 0.94 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.60 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 554.49 ± 0.62 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log
rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log
index 5872035..21731f6 100644
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.86 ± 0.11 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.85 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 220.22 ± 1.60 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.04 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log
similarity index 69%
rename from benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log
rename to benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log
index 71ec3f9..ecb65ad 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 203.03 ± 0.17 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.58 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 193.90 ± 1.19 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.77 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
deleted file mode 100644
index c8ae56d..0000000
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 706.58 ± 0.96 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.87 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log
deleted file mode 100644
index 305aaa3..0000000
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.31 ± 0.28 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.88 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log
index 86106ee..f5c1c56 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 703.10 ± 0.68 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.83 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 751.04 ± 1.24 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.01 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log
index 3898840..c86174c 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 818.63 ± 0.82 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.47 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 811.04 ± 1.22 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.45 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log
index 2e84f94..3f70b79 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.39 ± 0.17 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.81 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.99 ± 1.44 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.00 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log
index a31080a..4cc26aa 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 228.56 ± 0.31 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.51 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 794.90 ± 1.42 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.45 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
index b52b25c..301a2b5 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 706.92 ± 0.89 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.87 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 752.36 ± 0.48 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
index 228b25b..1ddb96e 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 554.98 ± 0.46 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.61 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 569.66 ± 0.60 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.78 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log
index 419821e..e22f092 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 222.26 ± 0.30 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 13.86 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | pp512 | 750.36 ± 1.88 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 0 | tg128 | 14.05 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log
index 8c0ebea..01fed10 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 201.53 ± 0.07 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.59 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | pp512 | 559.73 ± 0.51 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 99 | 1 | 0 | tg128 | 13.79 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
index 670f9fa..6d1ed7b 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 675.90 ± 1.28 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 14.26 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 680.44 ± 0.55 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.39 ± 0.03 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
index f680dfa..3c0a8e7 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 371.03 ± 0.33 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.49 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 371.66 ± 0.51 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 12.62 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
index 36cc6ea..5a3ee90 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 504.61 ± 2.97 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 14.05 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | pp512 | 502.88 ± 1.45 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 0 | tg128 | 14.21 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
index df5009a..8f4867f 100644
--- a/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
+++ b/benchmark/results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 495.37 ± 0.71 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 13.87 ± 0.00 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | pp512 | 496.33 ± 1.83 |
+| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 99 | 1 | 0 | tg128 | 14.02 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
index aab4706..0219357 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 92.82 ± 0.46 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.05 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 87.20 ± 3.70 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
index 43f28f1..8dcf6c9 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 94.62 ± 0.56 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 68.87 ± 14.37 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.08 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
index b7e4cd2..627bb9e 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 91.25 ± 0.44 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 82.57 ± 10.36 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log
index da3d8bb..b35b468 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 84.81 ± 0.48 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 74.78 ± 10.12 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..523552b
--- /dev/null
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 395.28 ± 0.22 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.96 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log
rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
index d3b262b..f8b41b5 100644
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.75 ± 0.35 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 468.37 ± 1.54 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.08 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
index 96d541d..1ce39b8 100644
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 92.52 ± 0.44 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.05 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 79.42 ± 0.41 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.97 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
index a5e826d..29c9209 100644
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 94.54 ± 0.52 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.03 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 89.19 ± 0.53 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log
new file mode 100644
index 0000000..fcaf5b3
--- /dev/null
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 398.35 ± 1.07 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log
similarity index 69%
rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log
rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log
index 6b1dd72..bb05e2f 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 310.92 ± 0.73 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.05 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 302.82 ± 2.53 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log
similarity index 69%
rename from benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log
rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log
index aaaffba..f41ad73 100644
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 91.54 ± 0.50 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 59.13 ± 7.79 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log
similarity index 69%
rename from benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
rename to benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log
index 846a5fd..c07bd16 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 82.85 ± 0.49 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 61.26 ± 10.54 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 6b535e0..0000000
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 405.35 ± 0.62 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log
deleted file mode 100644
index 26890da..0000000
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 86.80 ± 0.36 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.02 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log
index 8b094b5..d48d219 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 404.79 ± 0.61 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 4.04 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 418.46 ± 0.10 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
index a690d20..ba0b9fa 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 472.91 ± 1.05 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.03 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 466.83 ± 1.65 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.07 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
index 6f151a6..f152fdb 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 91.08 ± 0.67 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.03 ± 0.01 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 454.10 ± 1.09 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
index 13775d4..a66ffac 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 93.26 ± 0.55 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.03 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 499.43 ± 1.24 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
index 1615077..adb03dd 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 368.33 ± 0.38 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 3.71 ± 0.01 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 392.50 ± 0.50 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 3.97 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
index 3803787..5ec86d3 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 311.83 ± 0.31 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 311.25 ± 0.72 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log
index eba50a2..bec2363 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 80.07 ± 0.21 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.00 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | pp512 | 451.69 ± 0.62 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 0 | tg128 | 4.09 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
index 4575c7f..3a00d5c 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | pp512 | 324.43 ± 0.22 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 99 | 1 | 0 | tg128 | 4.09 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
index d74242d..f20e9bc 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
@@ -4,5 +4,5 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16:
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
ggml_vulkan: Device memory allocation of size 2819260416 failed.
ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf'
+main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf'
✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
index a667917..2578dff 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
@@ -4,5 +4,5 @@ ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16:
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
ggml_vulkan: Device memory allocation of size 2819260416 failed.
ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf'
+main: error: failed to load model '/home/kyuz0/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf'
✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 failed (exit 1)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
index 73c8358..9dfdc23 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | pp512 | 135.01 ± 0.28 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | tg128 | 4.03 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | pp512 | 129.49 ± 0.34 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 0 | tg128 | 4.06 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
index 8c6f730..96c0e93 100644
--- a/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
+++ b/benchmark/results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | pp512 | 137.76 ± 0.25 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | tg128 | 4.03 ± 0.00 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | pp512 | 137.67 ± 1.25 |
+| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 99 | 1 | 0 | tg128 | 4.06 ± 0.00 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log
index 43d8ffa..84fba20 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 727.59 ± 1.45 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.22 ± 0.03 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 728.70 ± 1.28 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.63 ± 0.03 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log
index 5f4bc59..073d72d 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 750.30 ± 1.03 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 69.96 ± 0.02 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 752.52 ± 0.83 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.93 ± 0.02 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log
index 8397b72..4ab8e49 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 728.24 ± 0.55 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 75.89 ± 0.03 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.33 ± 1.93 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.79 ± 0.03 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log
index ac89e51..bdf1afb 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 643.29 ± 0.97 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.53 ± 0.01 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 645.25 ± 0.89 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.31 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..4a3544a
--- /dev/null
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2033.46 ± 5.16 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.47 ± 0.26 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log
similarity index 78%
rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log
index 16dd036..78f4e0c 100644
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 752.25 ± 0.73 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.93 ± 0.01 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2276.86 ± 9.60 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.76 ± 0.26 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log
rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log
index 34e7e86..54a23d2 100644
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 729.91 ± 1.22 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.14 ± 0.03 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 727.18 ± 2.22 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.65 ± 0.74 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 78%
rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log
rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log
index 08d39fe..24c6a23 100644
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 645.88 ± 0.61 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.63 ± 0.01 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 740.27 ± 10.38 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.76 ± 0.11 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log
new file mode 100644
index 0000000..e669ba8
--- /dev/null
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2035.38 ± 4.03 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.40 ± 0.80 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log
similarity index 69%
rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log
rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log
index 4c71363..1562460 100644
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 646.16 ± 0.39 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.53 ± 0.02 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1515.55 ± 8.10 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.20 ± 0.39 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log
rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log
index f07fba3..b49eeff 100644
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 730.51 ± 1.49 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.35 ± 0.02 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 714.75 ± 27.98 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 66.10 ± 5.25 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log
similarity index 68%
rename from benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log
rename to benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log
index f67fdc0..f75c3de 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 1548.20 ± 4.48 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.64 ± 0.01 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 596.86 ± 37.66 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 58.75 ± 3.09 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
deleted file mode 100644
index e24e049..0000000
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 1812.73 ± 7.38 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.55 ± 0.02 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log
deleted file mode 100644
index 2791d8d..0000000
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.03 ± 0.75 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.59 ± 0.03 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index e88558b..0000000
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 651.26 ± 1.22 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 69.44 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log
index 502bbc1..c7f6ba3 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 1799.45 ± 7.32 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 75.43 ± 0.03 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2014.60 ± 24.35 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 59.16 ± 3.76 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log
index c34e4a4..0862fc6 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 2267.56 ± 6.61 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 68.27 ± 0.01 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2191.77 ± 78.21 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 54.32 ± 2.65 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log
index d86865b..4293b33 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.58 ± 0.87 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 75.48 ± 0.02 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1991.71 ± 2.91 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 56.37 ± 3.40 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log
index 9ddd54a..1af2b23 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 750.44 ± 0.80 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 68.27 ± 0.01 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 2096.22 ± 4.59 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 64.88 ± 0.05 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
index b4f3d2a..7e86d5a 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 1812.27 ± 4.63 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.22 ± 0.01 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 2027.41 ± 4.62 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 77.12 ± 0.03 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log
index a78e906..a3497bf 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 1510.06 ± 4.96 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.58 ± 0.02 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1550.55 ± 4.52 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.54 ± 0.06 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log
index e4cd337..51f19f4 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 729.81 ± 1.15 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 76.03 ± 0.04 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | pp512 | 1992.48 ± 7.34 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 0 | tg128 | 77.05 ± 0.03 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log
index a600775..a65b575 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 645.48 ± 1.40 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 69.67 ± 0.02 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | pp512 | 1474.15 ± 1.44 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 99 | 1 | 0 | tg128 | 70.44 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
index 8dafbf8..2624621 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1628.18 ± 1.73 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 84.23 ± 0.15 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1593.62 ± 2.90 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 85.26 ± 0.26 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
index fc50285..20e82ce 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 947.36 ± 1.47 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 60.35 ± 0.15 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 936.52 ± 2.35 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 60.89 ± 0.10 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
index 2ecedbd..6ba35a8 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1529.98 ± 0.80 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 86.95 ± 0.31 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | pp512 | 1515.05 ± 2.98 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 0 | tg128 | 87.54 ± 0.18 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
index 309d21b..a1a86a0 100644
--- a/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
+++ b/benchmark/results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 1498.81 ± 1.70 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 81.29 ± 0.12 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | pp512 | 1476.16 ± 5.12 |
+| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 99 | 1 | 0 | tg128 | 82.48 ± 0.36 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log
index aa28166..d8257a4 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 353.66 ± 0.64 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.65 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.59 ± 0.86 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.97 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log
index 11185b5..c765b22 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2ad71050) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] gpt-oss-120b-F16 __fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 390.43 ± 0.70 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.81 ± 0.01 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log
index e00d36f..306797a 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 352.40 ± 1.12 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 31.99 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.94 ± 1.35 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.97 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log
index 5f9c27c..a49785b 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 321.54 ± 0.46 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.03 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 322.57 ± 0.31 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.30 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..2b2057d
--- /dev/null
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 622.16 ± 6.71 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.91 ± 0.01 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log
index dbb739d..a8e0637 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 411.33 ± 1.01 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.50 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 743.09 ± 4.89 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.76 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log
rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log
index 2301d16..d94bfc6 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 355.01 ± 0.57 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.66 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.98 ± 0.72 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.86 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 69%
rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta__fa1.log
rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log
index 707b558..95ce008 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 249.65 ± 0.33 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.01 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 390.67 ± 0.97 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.79 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log
new file mode 100644
index 0000000..320bdde
--- /dev/null
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 617.00 ± 4.97 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.90 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2__fa1.log
rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log
index a62923c..aaf9547 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 247.95 ± 0.40 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 543.39 ± 5.51 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.28 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2.log
rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log
index fc1ded3..717bdb1 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.36 ± 0.53 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 31.90 ± 0.01 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.18 ± 0.29 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.88 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log
similarity index 69%
rename from benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log
rename to benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log
index 5a91196..a328319 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 548.27 ± 2.65 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.07 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 322.46 ± 0.46 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.33 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log
deleted file mode 100644
index 218c087..0000000
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 604.24 ± 4.34 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.69 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0.log
deleted file mode 100644
index 957c7c2..0000000
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 355.23 ± 1.71 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.66 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index b665daa..0000000
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 323.79 ± 0.87 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.04 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log
index f5db248..bfcbd06 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 592.27 ± 5.61 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.68 ± 0.02 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 643.61 ± 7.14 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.91 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log
index 60e0975..9e50477 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 735.02 ± 5.32 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.34 ± 0.01 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 736.33 ± 3.33 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.74 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log
index 7e97f90..a3eb53e 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 353.49 ± 1.71 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.63 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 651.63 ± 3.08 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.88 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log
index a07f108..422fcf2 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 388.50 ± 1.06 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.28 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 738.84 ± 9.12 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.79 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log
index c05f1a3..8fc2f66 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 598.68 ± 9.32 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.75 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 649.28 ± 0.87 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.99 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log
index f9a46a3..35770c3 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 546.30 ± 3.37 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.04 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 550.01 ± 3.85 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.38 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log
index 19aa96b..7ed9087 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.34 ± 0.67 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 33.76 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 659.79 ± 3.13 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 34.01 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log
index 2733c29..af54d9f 100644
--- a/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 324.26 ± 0.80 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.05 ± 0.00 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 553.65 ± 2.40 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 33.31 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log
index 461cbc2..9f2e80d 100644
--- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log
+++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 450.26 ± 1.46 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.56 ± 0.03 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 449.86 ± 1.68 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 34.19 ± 0.02 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
index 2219116..9fa4616 100644
--- a/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 499.80 ± 1.95 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.18 ± 0.01 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 496.21 ± 1.71 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.64 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log
index c45dee8..71ff53d 100644
--- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log
+++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 230.22 ± 0.76 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.16 ± 0.01 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 230.09 ± 0.83 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 33.57 ± 0.02 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log
index 718febd..547d915 100644
--- a/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log
+++ b/benchmark/results/gpt-oss-120b-F16__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 243.20 ± 1.11 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.15 ± 0.02 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 243.96 ± 0.96 |
+| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 33.79 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log
index c520d12..360ed4e 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.37 ± 0.72 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.11 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 353.20 ± 0.30 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.42 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
index 9dc5fe3..6969d6b 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
@@ -2,5 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x3c5a6050) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 387.10 ± 0.42 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.16 ± 0.01 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log
index 03da684..32eae28 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x8bc5050) reason :GPU Hang
+HW Exception by GPU node-1 (Agent handle: 0x2bea6540) reason :GPU Hang
✖ ! [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log
index 97f0889..1d395f6 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 319.23 ± 0.62 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 43.79 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 319.84 ± 0.73 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.43 ± 0.02 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..f3ebd8d
--- /dev/null
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 606.86 ± 5.18 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.02 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log
rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
index 769bedc..78fca14 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 246.76 ± 0.35 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 43.67 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 732.72 ± 4.06 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 45.14 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log
rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
index 3f432b9..0436056 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.53 ± 0.62 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.05 ± 0.08 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 351.42 ± 1.56 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.39 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 57%
rename from benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log
rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
index 1848af6..4fe67c3 100644
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -2,5 +2,5 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1fec7050) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M failed (exit 134)
+HW Exception by GPU node-1 (Agent handle: 0x3273c340) reason :GPU Hang
+✖ ! [rocm6_4_3-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log
new file mode 100644
index 0000000..f5b3307
--- /dev/null
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 608.20 ± 7.04 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.40 ± 0.01 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log
similarity index 78%
rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log
index fa4767b..7a9d128 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 408.50 ± 1.91 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.69 ± 0.18 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 533.95 ± 3.58 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.41 ± 0.03 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log
rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log
index c1f2f78..3f1f7ba 100644
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.45 ± 1.22 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 44.12 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 352.53 ± 0.81 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.41 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log
similarity index 68%
rename from benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log
rename to benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log
index bbf6d17..fa1e33d 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 539.93 ± 1.23 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.01 ± 0.00 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 320.78 ± 0.96 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.49 ± 0.03 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index 88c729a..0000000
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 589.45 ± 4.75 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.00 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log
deleted file mode 100644
index 1b8a39f..0000000
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x261760b0) reason :GPU Hang
-✖ ! [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0 failed (exit 134)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 2a30ca3..0000000
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 323.04 ± 0.94 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.01 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log
index 77cb354..17a14c7 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 586.82 ± 5.23 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 44.72 ± 0.30 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 635.84 ± 5.72 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.01 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log
index ae5c27f..8695248 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 684.17 ± 67.05 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.14 ± 0.27 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 708.36 ± 12.96 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.01 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
index c0da5bb..117d484 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 350.89 ± 1.88 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 44.93 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 650.68 ± 9.08 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.26 ± 0.01 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
index 4973b6a..173cfbe 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -2,4 +2,9 @@ ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __hblt0__fa1 failed (exit 134)
+| model | size | params | backend | ngl | fa | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 734.35 ± 10.26 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.85 ± 0.00 |
+
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
index 933469d..a5dcda7 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 589.82 ± 5.12 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.12 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 646.07 ± 6.86 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.50 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
index 456548d..c8991e9 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 540.27 ± 2.82 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 43.89 ± 0.00 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 541.57 ± 3.26 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.31 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log
index b3222aa..f2a91fb 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 354.60 ± 1.20 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.04 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | pp512 | 657.58 ± 3.78 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 0 | tg128 | 45.56 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log
index b82d07b..698f338 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 319.46 ± 0.48 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 43.90 ± 0.00 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | pp512 | 550.79 ± 2.99 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 99 | 1 | 0 | tg128 | 44.41 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
index 7d5b354..9d4e9e1 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 488.47 ± 2.30 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 48.21 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 485.54 ± 2.45 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 49.29 ± 0.03 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
index 3441dc1..c4cd434 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 547.53 ± 3.03 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 47.49 ± 0.08 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 540.81 ± 2.56 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 48.25 ± 0.03 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
index f6cbe94..acd5b3a 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 239.44 ± 1.23 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 49.15 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | pp512 | 239.24 ± 1.27 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 0 | tg128 | 50.39 ± 0.05 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
index 5538ed2..f04d91c 100644
--- a/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
+++ b/benchmark/results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 255.37 ± 1.68 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 49.31 ± 0.08 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | pp512 | 255.50 ± 1.49 |
+| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 99 | 1 | 0 | tg128 | 50.41 ± 0.04 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log
index f7c6172..6698000 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.31 ± 4.50 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.87 ± 0.01 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.30 ± 4.23 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.10 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log
index 020f7b9..5f3dda7 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 343.30 ± 5.27 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.76 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 342.14 ± 4.83 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.05 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log
index 88a7e15..5e1169a 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 322.55 ± 4.18 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 24.90 ± 0.02 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 324.36 ± 4.35 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.12 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log
index 41e5d8b..a1a5a05 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 304.86 ± 3.77 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.58 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 304.23 ± 3.73 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.85 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..45d2dea
--- /dev/null
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1198.51 ± 10.39 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.14 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log
similarity index 78%
rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2__fa1.log
rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log
index 266806c..219d081 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 257.11 ± 2.63 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.47 ± 0.08 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1506.44 ± 7.03 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.10 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log
rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log
index e1b0205..2c0a1a3 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 324.54 ± 4.39 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.87 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 326.80 ± 4.56 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.13 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log
index 8e851e8..b04117d 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 380.87 ± 8.21 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.79 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 350.18 ± 5.10 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.09 ± 0.00 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log
new file mode 100644
index 0000000..8d5aeeb
--- /dev/null
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+hipBLASLt error: Heuristic Fetch Failed!
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1185.57 ± 6.55 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.12 ± 0.01 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log
similarity index 69%
rename from benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log
rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log
index 3123235..4138942 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1011.32 ± 4.33 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.65 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1000.77 ± 2.37 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.83 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2.log
rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log
index d9bd7eb..76ec711 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 323.86 ± 4.33 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.27 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 322.00 ± 4.37 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.14 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log
similarity index 69%
rename from benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log
rename to benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log
index 41d7fd7..9ce8ee4 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 301.30 ± 4.81 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.65 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 303.26 ± 4.84 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.90 ± 0.00 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log b/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log
deleted file mode 100644
index c97bff7..0000000
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1135.90 ± 9.10 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.88 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0.log
deleted file mode 100644
index c01b9a9..0000000
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 313.05 ± 6.96 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.86 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log
index 1912f8c..d80b5e6 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1130.14 ± 7.45 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.84 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1256.75 ± 10.54 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.11 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log
index 5046a32..2648480 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1502.62 ± 12.84 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.67 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1481.17 ± 9.67 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.03 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log
index c83d4a2..9c6df9d 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 319.92 ± 6.39 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.83 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1202.19 ± 5.53 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.10 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log
index fe7a810..8b68b31 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 338.36 ± 5.02 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.71 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1422.90 ± 11.48 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 27.04 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log
index 1c550a3..8a71c43 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1130.86 ± 14.88 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.89 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1253.01 ± 23.20 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.11 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log
index a59b260..eaf7b98 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1007.82 ± 22.14 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.66 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1005.24 ± 32.45 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.89 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log
index 84a8d46..685527d 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 321.80 ± 6.18 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 26.83 ± 0.01 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1220.02 ± 12.30 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 27.17 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log
index ec0c58d..18915d7 100644
--- a/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 302.84 ± 5.01 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.61 ± 0.00 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 985.58 ± 10.64 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 26.88 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log
index 6875f68..381cc89 100644
--- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log
+++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 369.60 ± 1.30 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 8.72 ± 0.01 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 367.61 ± 1.90 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 8.69 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
index b4f9322..4212c20 100644
--- a/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 389.96 ± 1.87 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 8.70 ± 0.01 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 386.12 ± 1.98 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 8.66 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log
index ad6bfa4..a3f7dda 100644
--- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log
+++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 318.04 ± 1.50 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 7.89 ± 0.01 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 315.56 ± 1.40 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 7.86 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log
index 072c052..257f941 100644
--- a/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log
+++ b/benchmark/results/gpt-oss-20b-F32__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 334.64 ± 1.46 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 7.90 ± 0.01 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 333.31 ± 1.47 |
+| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 7.92 ± 0.01 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log
index 59744c9..28cbd07 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 581.92 ± 2.00 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.34 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 582.60 ± 4.90 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.91 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log
index 97f911b..985eaf7 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 642.40 ± 3.59 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.74 ± 0.00 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 644.05 ± 3.87 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.63 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log
index 2bd619e..c8bf125 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 582.94 ± 2.35 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.35 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 581.11 ± 2.96 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.00 ± 0.02 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log
index 8a01f71..320f480 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 522.14 ± 1.92 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.97 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 522.29 ± 2.36 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.63 ± 0.00 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log
new file mode 100644
index 0000000..2c92204
--- /dev/null
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1184.03 ± 8.37 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.07 ± 0.00 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log
rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log
index cb2c45b..cd2e38f 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 649.48 ± 3.21 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 64.18 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1480.28 ± 9.38 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.45 ± 0.02 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log
rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log
index c377132..f7e992e 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 580.83 ± 2.46 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.47 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.04 ± 2.52 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.87 ± 0.02 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log
similarity index 78%
rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log
rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log
index 34d817d..2016c11 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 394.67 ± 1.08 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.97 ± 0.00 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 643.25 ± 3.86 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.67 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log
new file mode 100644
index 0000000..d5473e4
--- /dev/null
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3.log
@@ -0,0 +1,15 @@
+ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
+ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
+ggml_cuda_init: found 1 ROCm devices:
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+rocBLAS error: No hipBLASLt solution found
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_HIPBLASLT_ERROR environment variable is set.
+
+rocBLAS warning: hipBlasLT failed, falling back to tensile.
+This message will be only be displayed once, unless the ROCBLAS_VERBOSE_TENSILE_ERROR environment variable is set.
+| model | size | params | backend | ngl | mmap | test | t/s |
+| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1171.02 ± 7.04 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.94 ± 0.04 |
+
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log
similarity index 69%
rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log
rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log
index 9c6c567..abd5fd2 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1005.66 ± 1.52 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.07 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 996.31 ± 6.53 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.68 ± 0.01 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log
similarity index 79%
rename from benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2.log
rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log
index 343b2b0..be6782a 100644
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm6_4_2.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 582.89 ± 2.32 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.45 ± 0.02 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 582.51 ± 2.41 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.89 ± 0.01 |
-build: cd6983d5 (6119)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log
similarity index 68%
rename from benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log
rename to benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log
index 661d58f..90bc0d7 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log
@@ -1,10 +1,10 @@
ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
+ Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 528.92 ± 2.02 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.00 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 522.63 ± 1.74 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.66 ± 0.03 |
-build: 79c1160b (6123)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log
deleted file mode 100644
index 7ccf6e0..0000000
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1128.54 ± 2.40 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.39 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log
deleted file mode 100644
index 57a687a..0000000
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 585.03 ± 1.84 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.36 ± 0.01 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log
index f49d7ff..8e4bdde 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1124.54 ± 9.14 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.19 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1236.64 ± 11.20 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.78 ± 0.01 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log
index 82390bf..c87a02c 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 1474.70 ± 11.50 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.31 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1460.58 ± 11.92 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.26 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log
index b1e54db..2dd9efb 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 583.69 ± 2.09 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.26 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1299.34 ± 7.77 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.85 ± 0.00 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log
index 3068d5b..fc2acae 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 642.92 ± 1.97 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.28 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1516.33 ± 21.51 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 64.40 ± 0.01 |
-build: 34c9d765 (6122)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log
index d848311..34dc4e8 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 1125.60 ± 1.90 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.35 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1246.14 ± 8.32 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.15 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
index bd4d588..f6feec3 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 997.74 ± 8.16 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.00 ± 0.00 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1010.38 ± 6.35 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.49 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log
index 8b8a81a..425b7fd 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 584.02 ± 1.44 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 64.50 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | pp512 | 1303.74 ± 6.94 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 0 | tg128 | 65.10 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log
index d0567df..0bd1151 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log
@@ -4,7 +4,7 @@ ggml_cuda_init: found 1 ROCm devices:
Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 525.48 ± 1.39 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.04 ± 0.01 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | pp512 | 1037.92 ± 11.67 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 99 | 1 | 0 | tg128 | 63.63 ± 0.01 |
-build: 79c1160b (6123)
+build: de219279 (6181)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
index ec3e361..10fffaa 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 1218.18 ± 8.08 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 69.76 ± 0.07 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 1220.69 ± 8.95 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 71.42 ± 0.20 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
index fbda2c7..185b49c 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 1482.59 ± 12.76 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 68.63 ± 0.11 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 1467.61 ± 12.70 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 69.47 ± 0.09 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log
index a2342cc..cf40790 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 649.86 ± 5.16 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 70.72 ± 0.04 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | pp512 | 651.21 ± 5.24 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 0 | tg128 | 72.35 ± 0.08 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
index d1051a1..ed9a28f 100644
--- a/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
+++ b/benchmark/results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
@@ -2,7 +2,7 @@ ggml_vulkan: Found 1 Vulkan devices:
ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
| model | size | params | backend | ngl | fa | mmap | test | t/s |
| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 728.71 ± 8.40 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 70.49 ± 0.04 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | pp512 | 732.35 ± 7.51 |
+| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 99 | 1 | 0 | tg128 | 72.05 ± 0.07 |
-build: 34c9d765 (6122)
+build: 1fe00296 (6182)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
deleted file mode 100644
index 15e8d8f..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.76 ± 0.04 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.48 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
deleted file mode 100644
index ab19f84..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 99.09 ± 0.10 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
deleted file mode 100644
index 9bd37ad..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 81.54 ± 0.11 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log
deleted file mode 100644
index b92eb71..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2595b0b0) reason :GPU Hang
-✖ ! [rocm7_beta] llama3.3-70.6B-Q4_K_M __hblt0 failed (exit 134)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log
deleted file mode 100644
index 3fdb6c0..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 31.63 ± 0.02 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 4.62 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log
deleted file mode 100644
index 80974fd..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 99.41 ± 0.11 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.62 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log
deleted file mode 100644
index 928b750..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 106.70 ± 0.12 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.60 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log
deleted file mode 100644
index d78ff89..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | pp512 | 33.87 ± 0.08 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 0 | tg128 | 4.61 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log
deleted file mode 100644
index 1757ab6..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | pp512 | 34.48 ± 0.05 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 99 | 1 | 0 | tg128 | 4.61 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
deleted file mode 100644
index 5075f85..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 99.16 ± 0.09 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.62 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
deleted file mode 100644
index afc44e9..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 81.56 ± 0.09 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.62 ± 0.00 |
-
-build: 79c1160b (6123)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log
deleted file mode 100644
index 9718332..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] llama3.3-70.6B-Q4_K_M __hblt0 failed (exit 134)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log
deleted file mode 100644
index 9d5bb3c..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] llama3.3-70.6B-Q4_K_M __hblt0__fa1 failed (exit 134)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
deleted file mode 100644
index f70a707..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 72.73 ± 0.05 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.08 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
deleted file mode 100644
index d13dc99..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 73.47 ± 0.03 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 5.04 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
deleted file mode 100644
index 5ccc1cf..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 78.79 ± 0.21 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.04 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
deleted file mode 100644
index 375bd60..0000000
--- a/benchmark/results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 80.58 ± 0.13 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 5.03 ± 0.00 |
-
-build: 34c9d765 (6122)
diff --git a/benchmark/results/run_benchmarks.log b/benchmark/results/run_benchmarks.log
deleted file mode 100644
index b9e965c..0000000
--- a/benchmark/results/run_benchmarks.log
+++ /dev/null
@@ -1,1392 +0,0 @@
-Found 19 model(s) to bench:
- • /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
- • /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
- • /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
- • /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
- • /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
- • /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
- • /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
- • /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
- • /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
- • /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
- • /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
- • /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
- • /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
- • /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
- • /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
- • /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
- • /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
- • /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
- • /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
- * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 : FAILED
-
-â–¶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
- * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm7_beta] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [vulkan_radv] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
- * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
- * [rocm6_4_2-rocwmma] gpt-oss-120b-F16 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm6_4_2] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
- * [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 : FAILED
-
-â–¶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm7_rc] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm7_beta] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm7_beta] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [vulkan_radv] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [vulkan_radv] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
- * [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
- * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
- * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED
-
-â–¶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
- * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
- * [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
-
-
-â–¶ [rocm7_rc] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_beta] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
-
-
-â–¶ [rocm7_beta] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1
-
-
-â–¶ [vulkan_radv] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
-
-
-â–¶ [vulkan_radv] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
-
-
-â–¶ [vulkan_amdvlk] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
-
-
-â–¶ [rocm6_4_2] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL __fa1
- → log: results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
deleted file mode 100644
index 268535b..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x834aa90) on address 0x7f10fb96f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 52deb8e..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x100d3790) reason :GPU Hang
-✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 8039123..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x13829790) on address 0x7fa8ef9a9000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index fcf0f01..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 130.17 ± 0.38 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 19.83 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index 4ef718e..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 200.76 ± 0.32 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.78 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 4bbf6de..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 201.86 ± 0.27 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 22.83 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index 90347e7..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.73 ± 0.23 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 22.88 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index cf98168..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 132.54 ± 0.34 |
-| glm4moe 106B.A12B Q4_K - Medium | 68.01 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 23.31 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index a0c808c..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2f508a90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index 273166e..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 120.54 ± 0.30 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log
deleted file mode 100644
index c23fe13..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2a849790) reason :GPU Hang
-✖ ! [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
deleted file mode 100644
index 5fbf5b3..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | pp512 | 124.18 ± 0.48 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | ROCm | 999 | 0 | tg128 | 15.49 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
deleted file mode 100644
index 28ae734..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
deleted file mode 100644
index 4247170..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 223.02 ± 0.69 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.47 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
deleted file mode 100644
index e3bc753..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 224.54 ± 0.65 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.49 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
deleted file mode 100644
index 5f0ace5..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | pp512 | 127.36 ± 0.46 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 0 | tg128 | 16.78 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
deleted file mode 100644
index 1973a52..0000000
--- a/benchmark/results_08-08-2025/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | pp512 | 131.78 ± 0.46 |
-| glm4moe 106B.A12B Q6_K | 94.57 GiB | 110.47 B | Vulkan | 999 | 1 | 0 | tg128 | 16.99 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
deleted file mode 100644
index 135d108..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x121f0a90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index 29b2095..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x17018a90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
deleted file mode 100644
index 08dae7b..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x11442a90) reason :GPU Hang
-✖ ! [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index e01b520..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0xa636790) reason :GPU Hang
-✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 2f2342b..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1417b7b0) reason :GPU Hang
-✖ ! [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index c479337..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | pp512 | 33.30 ± 0.04 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 0 | tg128 | 2.64 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index 7b0ea20..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | pp512 | 31.09 ± 0.02 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | ROCm | 999 | 1 | 0 | tg128 | 2.65 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index 4581b23..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-ggml_vulkan: Device memory allocation of size 2491416576 failed.
-ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf'
-✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 failed (exit 1)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 8835330..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-ggml_vulkan: Device memory allocation of size 2491416576 failed.
-ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf'
-✖ ! [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 1)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index c6c72c5..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | pp512 | 78.70 ± 0.20 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 0 | tg128 | 2.66 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index ea12120..0000000
--- a/benchmark/results_08-08-2025/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | pp512 | 81.29 ± 0.14 |
-| qwen2 70B Q8_0 | 78.21 GiB | 72.71 B | Vulkan | 999 | 1 | 0 | tg128 | 2.66 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index 3de552f..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1496da90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 2acc073..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0xfeef7b0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 7a57ad3..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x6d017c0) on address 0x7f967f1a9000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index b9ba150..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index c55bab8..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index eb3efec..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 98.14 ± 0.14 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 966e109..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 99.24 ± 0.16 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.72 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index 80c3a0e..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 80.11 ± 0.09 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 2.73 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index 5826f3e..0000000
--- a/benchmark/results_08-08-2025/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 82.90 ± 0.14 |
-| llama 70B Q8_0 | 75.65 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 2.73 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
deleted file mode 100644
index 40f418b..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x28bb9a90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index a94cdd6..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x194fea90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index 6d3b4ea..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x17ad57b0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 107b01e..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x2314b7b0) on address 0x7f38249a9000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index ccf7ac1..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 135.25 ± 0.50 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 14.43 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index 8df0b3e..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index dc80b9d..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 243.45 ± 1.29 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.29 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 08242f2..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 247.48 ± 1.28 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.03 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index ba7a655..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 148.25 ± 0.91 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 15.21 ± 0.06 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index 14f12dd..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 149.82 ± 0.83 |
-| llama4 17Bx16E (Scout) Q6_K | 82.35 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 15.21 ± 0.04 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
deleted file mode 100644
index 2faeaa3..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x9ae6a90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index 6ff4745..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x6e9ba90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index c768b8e..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x4081f7b0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
deleted file mode 100644
index 98c472e..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x3c0f27b0) reason :GPU Hang
-✖ ! [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
deleted file mode 100644
index 9c06e2b..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
deleted file mode 100644
index 3ccfa82..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
deleted file mode 100644
index 3bdeae7..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 258.18 ± 1.38 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.23 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 2060565..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 260.16 ± 1.44 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.09 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
deleted file mode 100644
index d9b6ebc..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 168.63 ± 0.81 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 12.26 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
deleted file mode 100644
index 579e532..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 172.37 ± 0.92 |
-| llama4 17Bx16E (Scout) Q8_0 | 106.65 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.25 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
deleted file mode 100644
index 070646e..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x1a40fa90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index 3fa46c3..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2e0ffa90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
deleted file mode 100644
index 9d0c061..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x3a741a90) reason :GPU Hang
-✖ ! [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index fb93137..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | pp512 | 138.90 ± 0.66 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | ROCm | 999 | 0 | tg128 | 17.62 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index 2e1a6fc..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index 75ac351..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 218.18 ± 0.83 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 20.04 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index a745a31..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 221.15 ± 0.74 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.58 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index 4b78701..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | pp512 | 152.21 ± 0.66 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 0 | tg128 | 19.98 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index ee535dc..0000000
--- a/benchmark/results_08-08-2025/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | pp512 | 155.22 ± 1.09 |
-| llama4 17Bx16E (Scout) Q4_K - Medium | 57.73 GiB | 107.77 B | Vulkan | 999 | 1 | 0 | tg128 | 19.93 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
deleted file mode 100644
index aa6dfe3..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x153dfa90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index e2df164..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x2bd2ba90) reason :GPU Hang
-✖ ! [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index b5a6749..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x513c7b0) reason :GPU Hang
-✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
deleted file mode 100644
index 7826050..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x2567c7c0) on address 0x7ee66236f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
deleted file mode 100644
index dbd9c47..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
deleted file mode 100644
index 57b950a..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
deleted file mode 100644
index af5c138..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 114.49 ± 0.60 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 15.98 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 19e5e37..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 116.07 ± 0.64 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 15.84 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
deleted file mode 100644
index 2aefda4..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | pp512 | 64.85 ± 0.38 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 0 | tg128 | 16.58 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
deleted file mode 100644
index c0359f0..0000000
--- a/benchmark/results_08-08-2025/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | pp512 | 66.76 ± 0.43 |
-| qwen3moe 235B.A22B Q3_K - Medium | 96.99 GiB | 235.09 B | Vulkan | 999 | 1 | 0 | tg128 | 16.83 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index ea26bd0..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 153.49 ± 1.19 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.52 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index bb2103f..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 138.49 ± 2.52 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.35 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index e446a9b..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 152.26 ± 2.41 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.55 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index d73c640..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 137.52 ± 1.75 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 24.33 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index 1687c7e..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 107.48 ± 0.16 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.04 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index a9a752b..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.64 ± 0.13 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.96 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index ccca043..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.97 ± 0.12 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.38 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index 48148ef..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 87.05 ± 0.10 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.40 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log
deleted file mode 100644
index a3987ef..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 390.07 ± 0.40 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.19 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log
deleted file mode 100644
index a9ca9ef..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 300.60 ± 2.31 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.78 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log
deleted file mode 100644
index 8ff09a7..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 388.99 ± 1.86 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 50.31 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log
deleted file mode 100644
index db6f9b0..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | pp512 | 302.87 ± 0.88 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | ROCm | 999 | 1 | 0 | tg128 | 49.90 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log
deleted file mode 100644
index 51b45f0..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 736.95 ± 3.72 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 56.89 ± 0.26 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 3f2a08e..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 727.71 ± 2.81 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 53.34 ± 0.31 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log
deleted file mode 100644
index 5140ff3..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 395.16 ± 1.55 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 58.95 ± 0.45 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log
deleted file mode 100644
index 6bbc4f2..0000000
--- a/benchmark/results_08-08-2025/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 405.61 ± 1.85 |
-| qwen3moe 30B.A3B Q6_K | 24.53 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 58.06 ± 0.28 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
deleted file mode 100644
index 6625574..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.50 ± 1.69 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.55 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log
deleted file mode 100644
index 222959d..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 150.34 ± 1.74 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.14 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
deleted file mode 100644
index cc48f94..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 146.55 ± 1.77 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.54 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 285bed2..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-Memory access fault by GPU node-1 (Agent handle: 0x2bd8a7b0) on address 0x7fe0b0d6f000. Reason: Page not present or supervisor privilege.
-✖ ! [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index 29fa537..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | pp512 | 145.91 ± 1.76 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | ROCm | 999 | 0 | tg128 | 24.57 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index 1416318..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index 65ecb3e..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 106.99 ± 0.10 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 8.03 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 2b69233..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 107.10 ± 0.08 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.98 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index 3a2d167..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | pp512 | 85.50 ± 0.06 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 0 | tg128 | 7.42 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index 9132fa2..0000000
--- a/benchmark/results_08-08-2025/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | pp512 | 86.52 ± 0.06 |
-| qwen3moe 30B.A3B BF16 | 56.89 GiB | 30.53 B | Vulkan | 999 | 1 | 0 | tg128 | 7.40 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
deleted file mode 100644
index 6493650..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.67 ± 0.37 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.88 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log
deleted file mode 100644
index a535d64..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 203.12 ± 0.35 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.60 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
deleted file mode 100644
index f1ec100..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | pp512 | 222.49 ± 0.29 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 0 | tg128 | 13.86 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
deleted file mode 100644
index f4493e0..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | pp512 | 201.47 ± 0.21 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | ROCm | 999 | 1 | 0 | tg128 | 13.61 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
deleted file mode 100644
index 5ac352f..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 676.94 ± 0.85 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 13.99 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
deleted file mode 100644
index b3193bd..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 371.17 ± 0.24 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 12.30 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
deleted file mode 100644
index b620676..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | pp512 | 503.27 ± 1.09 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 0 | tg128 | 13.76 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
deleted file mode 100644
index 5e9431a..0000000
--- a/benchmark/results_08-08-2025/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | pp512 | 495.99 ± 2.36 |
-| gemma3 12B Q8_0 | 13.40 GiB | 11.77 B | Vulkan | 999 | 1 | 0 | tg128 | 13.61 ± 0.03 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
deleted file mode 100644
index c646996..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
+++ /dev/null
@@ -1,6 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-HW Exception by GPU node-1 (Agent handle: 0x10c4a90) reason :GPU Hang
-✖ ! [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log
deleted file mode 100644
index 18449f7..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.61 ± 0.31 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
deleted file mode 100644
index 9e9f25c..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | pp512 | 55.68 ± 0.47 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 0 | tg128 | 3.11 ± 0.98 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
deleted file mode 100644
index f7ce012..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | pp512 | 83.08 ± 0.42 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | ROCm | 999 | 1 | 0 | tg128 | 4.04 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
deleted file mode 100644
index d74242d..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-ggml_vulkan: Device memory allocation of size 2819260416 failed.
-ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf'
-✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 failed (exit 1)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
deleted file mode 100644
index a667917..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-ggml_vulkan: Device memory allocation of size 2819260416 failed.
-ggml_vulkan: Requested buffer size exceeds device memory allocation limit: ErrorOutOfDeviceMemory
-main: error: failed to load model '/mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf'
-✖ ! [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 failed (exit 1)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
deleted file mode 100644
index 0c3a407..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | pp512 | 135.58 ± 0.45 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 0 | tg128 | 4.00 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
deleted file mode 100644
index f2077af..0000000
--- a/benchmark/results_08-08-2025/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | pp512 | 138.61 ± 0.55 |
-| gemma3 27B BF16 | 50.31 GiB | 27.01 B | Vulkan | 999 | 1 | 0 | tg128 | 4.00 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
deleted file mode 100644
index ea76e52..0000000
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 732.13 ± 1.42 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.23 ± 0.03 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log
deleted file mode 100644
index 76e9619..0000000
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | pp512 | 652.29 ± 0.45 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 1 | 0 | tg128 | 69.62 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
deleted file mode 100644
index ce94640..0000000
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | pp512 | 730.59 ± 1.69 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | ROCm | 999 | 0 | tg128 | 76.01 ± 0.03 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
deleted file mode 100644
index b707702..0000000
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1614.72 ± 4.91 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 84.00 ± 0.23 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 6055d96..0000000
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 942.34 ± 1.76 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 57.70 ± 0.22 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
deleted file mode 100644
index 5a56858..0000000
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | pp512 | 1527.75 ± 3.86 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 0 | tg128 | 85.54 ± 0.99 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
deleted file mode 100644
index ab5608b..0000000
--- a/benchmark/results_08-08-2025/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | pp512 | 1489.57 ± 4.71 |
-| gemma3 4B Q3_K - Small | 1.80 GiB | 3.88 B | Vulkan | 999 | 1 | 0 | tg128 | 80.63 ± 0.22 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta.log
deleted file mode 100644
index 4e2c281..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 357.38 ± 0.76 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.62 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc.log
deleted file mode 100644
index 63dd9d9..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 356.67 ± 0.74 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 33.68 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc__fa1.log
deleted file mode 100644
index 8096c36..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 247.49 ± 0.65 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 33.07 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk.log
deleted file mode 100644
index 755a9cf..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 448.17 ± 1.37 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.39 ± 0.03 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 152170f..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 498.69 ± 2.19 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.06 ± 0.03 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv.log
deleted file mode 100644
index 5ab95e4..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 229.59 ± 0.74 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 33.08 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv__fa1.log
deleted file mode 100644
index 9d830ae..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-F16__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 243.40 ± 0.99 |
-| gpt-oss ?B F16 | 60.87 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 33.07 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
deleted file mode 100644
index 3892e39..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 354.82 ± 1.02 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.00 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log
deleted file mode 100644
index 69476e2..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | pp512 | 248.22 ± 0.50 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 1 | 0 | tg128 | 44.05 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
deleted file mode 100644
index 3a57ced..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | pp512 | 353.20 ± 0.59 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | ROCm | 999 | 0 | tg128 | 45.15 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
deleted file mode 100644
index 93e7fca..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,5 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-✖ ! [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 failed (exit 134)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
deleted file mode 100644
index d229658..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 486.90 ± 2.23 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 48.08 ± 0.03 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
deleted file mode 100644
index b556c96..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 546.41 ± 2.88 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 47.25 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
deleted file mode 100644
index 802c652..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | pp512 | 239.72 ± 1.23 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 0 | tg128 | 49.01 ± 0.06 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
deleted file mode 100644
index 6b8a8c4..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | pp512 | 255.17 ± 1.65 |
-| gpt-oss ?B MXFP4 MoE | 59.02 GiB | 116.83 B | Vulkan | 999 | 1 | 0 | tg128 | 48.93 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta.log
deleted file mode 100644
index d76138e..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 322.43 ± 2.59 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.89 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta__fa1.log
deleted file mode 100644
index 6dd4954..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 254.08 ± 3.99 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.62 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc.log
deleted file mode 100644
index 67b820b..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 319.36 ± 3.07 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 26.88 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc__fa1.log
deleted file mode 100644
index e07a069..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 254.87 ± 2.27 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 26.62 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk.log
deleted file mode 100644
index 52536d1..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 369.69 ± 1.79 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 8.59 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 974e845..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 389.86 ± 2.13 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 8.58 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv.log
deleted file mode 100644
index 7decf08..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 319.09 ± 1.46 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 7.79 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv__fa1.log
deleted file mode 100644
index a9ce691..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-F32__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 335.15 ± 1.80 |
-| gpt-oss ?B BF16 | 38.97 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 7.79 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta.log
deleted file mode 100644
index 441cec1..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 583.52 ± 2.76 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.39 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log
deleted file mode 100644
index e5f1e99..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 396.75 ± 0.60 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 62.98 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc.log
deleted file mode 100644
index 97fab79..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | pp512 | 581.83 ± 1.10 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 0 | tg128 | 64.50 ± 0.02 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
deleted file mode 100644
index 3e34f41..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | pp512 | 394.87 ± 0.73 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | ROCm | 999 | 1 | 0 | tg128 | 63.06 ± 0.01 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
deleted file mode 100644
index 2d4b788..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 1205.02 ± 7.18 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 68.84 ± 0.04 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 9a5c4c5..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 1472.56 ± 14.39 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 67.78 ± 0.18 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv.log
deleted file mode 100644
index f400d0f..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | pp512 | 648.85 ± 6.28 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 0 | tg128 | 69.88 ± 0.04 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
deleted file mode 100644
index 1959c7e..0000000
--- a/benchmark/results_08-08-2025/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | pp512 | 728.38 ± 8.17 |
-| gpt-oss ?B MXFP4 MoE | 11.27 GiB | 20.91 B | Vulkan | 999 | 1 | 0 | tg128 | 69.80 ± 0.05 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log
deleted file mode 100644
index e9da9da..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.47 ± 0.04 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.62 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log
deleted file mode 100644
index 0388774..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 34.51 ± 0.02 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.61 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
deleted file mode 100644
index 01f32df..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.79 ± 0.03 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.52 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log
deleted file mode 100644
index f9ae86b..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: Radeon 8060S Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.67 ± 0.04 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
deleted file mode 100644
index f6959d1..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.88 ± 0.02 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
deleted file mode 100644
index 2869c45..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.67 ± 0.02 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
deleted file mode 100644
index 6bd1b01..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | pp512 | 33.91 ± 0.03 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 0 | tg128 | 4.61 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
deleted file mode 100644
index 77dd920..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
+++ /dev/null
@@ -1,10 +0,0 @@
-ggml_cuda_init: GGML_CUDA_FORCE_MMQ: no
-ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
-ggml_cuda_init: found 1 ROCm devices:
- Device 0: AMD Radeon Graphics, gfx1151 (0x1151), VMM: no, Wave Size: 32
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | pp512 | 31.66 ± 0.04 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | ROCm | 999 | 1 | 0 | tg128 | 4.63 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
deleted file mode 100644
index bc604f8..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 72.75 ± 0.02 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.03 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
deleted file mode 100644
index 7ac44cb..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (AMD open-source driver) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 32768 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 73.57 ± 0.02 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 5.00 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
deleted file mode 100644
index 4cc5212..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | pp512 | 78.99 ± 0.18 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 0 | tg128 | 5.00 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log b/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
deleted file mode 100644
index 869327e..0000000
--- a/benchmark/results_08-08-2025/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
+++ /dev/null
@@ -1,8 +0,0 @@
-ggml_vulkan: Found 1 Vulkan devices:
-ggml_vulkan: 0 = Radeon 8060S Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 1 | matrix cores: KHR_coopmat
-| model | size | params | backend | ngl | fa | mmap | test | t/s |
-| ------------------------------ | ---------: | ---------: | ---------- | --: | -: | ---: | --------------: | -------------------: |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | pp512 | 80.92 ± 0.05 |
-| llama 70B Q4_K - Medium | 39.59 GiB | 70.55 B | Vulkan | 999 | 1 | 0 | tg128 | 4.99 ± 0.00 |
-
-build: cd6983d5 (6119)
diff --git a/benchmark/results_08-08-2025/run_benchmarks.log b/benchmark/results_08-08-2025/run_benchmarks.log
deleted file mode 100644
index 073dde1..0000000
--- a/benchmark/results_08-08-2025/run_benchmarks.log
+++ /dev/null
@@ -1,1153 +0,0 @@
-Found 18 model(s) to bench:
- • /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
- • /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
- • /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
- • /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
- • /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
- • /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
- • /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
- • /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
- • /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
- • /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
- • /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
- • /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
- • /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
- • /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
- • /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
- • /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
- • /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
- • /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
- * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 : FAILED
-
-â–¶ [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
- * [vulkan_amdvlk] gemma-3-27b-it-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf
-
- * [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2] gemma-3-27b-it-BF16-00001-of-00002 __fa1
- → log: results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/BF16/gemma-3-27b-it-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm7_beta] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [vulkan_radv] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [vulkan_amdvlk] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf
-
-
-â–¶ [rocm6_4_2] gemma-3-12b-it-UD-Q8_K_XL __fa1
- → log: results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-12b-it-UD-Q8_K_XL.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm7_rc] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm7_beta] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [vulkan_radv] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [vulkan_amdvlk] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf
-
-
-â–¶ [rocm6_4_2] gemma-3-4b-it-Q3_K_S __fa1
- → log: results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gemma-3/gemma-3-4b-it-Q3_K_S.gguf -fa 1
-
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
- * [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q4_K_XL/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm7_rc] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm7_beta] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1
- → log: results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/GLM-4.5-Air/UD-Q6_K_XL/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2] GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_rc] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-120b-F16
- → log: results/gpt-oss-120b-F16__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf
-
-
-â–¶ [rocm6_4_2] gpt-oss-120b-F16 __fa1
- → log: results/gpt-oss-120b-F16__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-F16.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
- * [rocm7_rc] gpt-oss-120b-mxfp4-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2] gpt-oss-120b-mxfp4-00001-of-00003 __fa1
- → log: results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-120b/gpt-oss-120b-mxfp4-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-F32
- → log: results/gpt-oss-20b-F32__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-F32 __fa1
- → log: results/gpt-oss-20b-F32__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-F32.gguf -fa 1
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm7_rc] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm7_beta] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [vulkan_radv] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [vulkan_amdvlk] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-mxfp4
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf
-
-
-â–¶ [rocm6_4_2] gpt-oss-20b-mxfp4 __fa1
- → log: results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/gpt-oss-20b/gpt-oss-20b-mxfp4.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [vulkan_amdvlk] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/kimi-dev-72B-Q8_K_XL/UD-Q8_K_XL/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_rc] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002 __fa1
- → log: results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-70B-Instruct/UD-Q8_K_XL/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm7_rc] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm7_beta] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm7_beta] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [vulkan_radv] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [vulkan_radv] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [vulkan_amdvlk] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf
-
-
-â–¶ [rocm6_4_2] llama3.3-70.6B-Q4_K_M __fa1
- → log: results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-3.3-Q4_K_M/llama3.3-70.6B-Q4_K_M.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
- * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q4_K_XL/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
- * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
- * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q6_K/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002.gguf -fa 1
-
- * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
- * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED
-
-â–¶ [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm7_rc] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
- * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED
-
-â–¶ [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm7_beta] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1
- → log: results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/llama-4-scout-17b-16e/Q8_0/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2] Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
- * [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED
-
-â–¶ [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm7_rc] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
- * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED
-
-â–¶ [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm7_beta] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
- * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2-rocwmma] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_radv] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [vulkan_amdvlk] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf
-
-
-â–¶ [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1
- → log: results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-235B-Q3_K-XL/UD-Q3_K_XL/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003.gguf -fa 1
-
- * [rocm6_4_2] Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003 __fa1 : FAILED
-
-â–¶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Qwen3-30B-A3B-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen-3-30B-A3B/BF16/Qwen3-30B-A3B-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log
- → cmd: toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
- * [rocm7_rc] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log
- → cmd: toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
- * [rocm7_beta] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1 : FAILED
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2-rocwmma] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_radv] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log
- → cmd: toolbox run -c llama-vulkan-radv -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [vulkan_amdvlk] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log
- → cmd: toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
-
-â–¶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf
-
-
-â–¶ [rocm6_4_2] Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002 __fa1
- → log: results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log
- → cmd: toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench -ngl 999 -mmp 0 -m /mnt/models/qwen3-coder-30B-A3B/BF16/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002.gguf -fa 1
-
diff --git a/benchmark/run_benchmarks.sh b/benchmark/run_benchmarks.sh
index b4d15e7..151e1d1 100755
--- a/benchmark/run_benchmarks.sh
+++ b/benchmark/run_benchmarks.sh
@@ -28,7 +28,8 @@ echo
declare -A CMDS=(
[rocm6_4_2]="toolbox run -c llama-rocm-6.4.2 -- /usr/local/bin/llama-bench"
[rocm6_4_2-rocwmma]="toolbox run -c llama-rocm-6.4.2-rocwmma -- /usr/local/bin/llama-bench"
- [rocm7_beta]="toolbox run -c llama-rocm-7beta -- /usr/local/bin/llama-bench"
+ [rocm6_4_3]="toolbox run -c llama-rocm-6.4.3 -- /usr/local/bin/llama-bench"
+ [rocm6_4_3-rocwmma]="toolbox run -c llama-rocm-6.4.3-rocwmma -- /usr/local/bin/llama-bench"
[rocm7_rc]="toolbox run -c llama-rocm-7rc -- /usr/local/bin/llama-bench"
[rocm7_rc-rocwmma]="toolbox run -c llama-rocm-7rc-rocwmma -- /usr/local/bin/llama-bench"
[vulkan_amdvlk]="toolbox run -c llama-vulkan-amdvlk -- /usr/sbin/llama-bench"
@@ -41,8 +42,8 @@ for MODEL_PATH in "${MODEL_PATHS[@]}"; do
for ENV in "${!CMDS[@]}"; do
CMD="${CMDS[$ENV]}"
- # For ROCm 7 envs, run default + HIPBLASLT=0 variants; others: default only
- if [[ "$ENV" == rocm7_* ]]; then
+ # For ROCm 6.4.3 and 7 envs, run default + HIPBLASLT=0 variants; others: default only
+ if [[ "$ENV" == rocm7_* || "$ENV" == rocm6_4_3* ]]; then
HBLT_MODES=( default off )
else
HBLT_MODES=( default )
diff --git a/docs/index.html b/docs/index.html
index b81fa7e..d8963c8 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -363,6 +363,7 @@
Repo: kyuz0/amd-strix-halo-toolboxes
+ Platform: Framework Desktop, 128GB Unified RAM (accelerator-performance tuned profile)
Loading meta…
@@ -491,7 +492,7 @@
`;
diff --git a/docs/results.json b/docs/results.json
index 22c4563..d05c75a 100644
--- a/docs/results.json
+++ b/docs/results.json
@@ -1,22 +1,24 @@
{
"meta": {
- "generated_at": "2025-08-10T11:20:41Z",
- "os_kernel": "Fedora 42 \u2014 Linux fedora 6.16.0-264.vanilla.fc42.x86_64 (2025-07-28)",
+ "generated_at": "2025-08-17T07:42:51Z",
+ "os_kernel": "Fedora 42 \u2014 Linux 6.15.9-201.fc42.x86_64 (Sat Aug 2 11:37:34 UTC 2025)",
"llamacpp_builds": [
{
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
},
{
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
],
"environments": [
"rocm6_4_2",
"rocm6_4_2-rocwmma",
- "rocm7_beta",
- "rocm7_beta-hblt0",
+ "rocm6_4_3",
+ "rocm6_4_3-hblt0",
+ "rocm6_4_3-rocwmma",
+ "rocm6_4_3-rocwmma-hblt0",
"rocm7_rc",
"rocm7_rc-hblt0",
"rocm7_rc-rocwmma",
@@ -38,7 +40,7 @@
"tps_mean": null,
"tps_std": null,
"error": true,
- "error_type": "hang",
+ "error_type": "runtime",
"backend": null,
"ngl": null,
"mmap": null,
@@ -56,106 +58,6 @@
"env_base": "rocm6_4_2",
"env_variant": "rocwmma",
"fa": true,
- "test": "pp512",
- "tps_mean": 139.31,
- "tps_std": 0.13,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 19.97,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 130.07,
- "tps_std": 0.32,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 19.48,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
"test": null,
"tps_mean": null,
"tps_std": null,
@@ -168,118 +70,168 @@
"file_size_gib": null,
"name_params_b": null,
"quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
"build": null
},
{
"model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
"model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 124.5,
- "tps_std": 0.25,
+ "tps_mean": 131.14,
+ "tps_std": 0.28,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 68.01,
"name_params_b": 110.47,
"quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
"model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 20.02,
+ "tps_mean": 20.15,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 104.12,
+ "tps_std": 0.05,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 20.35,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 68.01,
"name_params_b": 110.47,
"quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
"model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 100.8,
- "tps_std": 0.14,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 20.13,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 130.22,
+ "tps_mean": 126.62,
+ "tps_std": 0.1,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 19.95,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 135.1,
"tps_std": 0.35,
"error": false,
"error_type": null,
@@ -290,21 +242,171 @@
"file_size_gib": 68.01,
"name_params_b": 110.47,
"quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
"model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 20.14,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 130.99,
+ "tps_std": 0.36,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 20.0,
+ "tps_mean": 20.14,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 140.15,
+ "tps_std": 0.41,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 20.15,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 126.66,
+ "tps_std": 0.22,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 20.14,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -315,19 +417,69 @@
"file_size_gib": 68.01,
"name_params_b": 110.47,
"quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
"model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
"fa": true,
+ "test": "pp512",
+ "tps_mean": 100.2,
+ "tps_std": 0.13,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 20.3,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
"test": null,
"tps_mean": null,
"tps_std": null,
@@ -340,7 +492,29 @@
"file_size_gib": null,
"name_params_b": null,
"quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
"build": null
},
{
@@ -351,12 +525,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 120.16,
- "tps_std": 0.21,
+ "tps_mean": 117.48,
+ "tps_std": 0.53,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 68.01,
@@ -364,8 +538,8 @@
"quant": "Q4_K_XL",
"log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -376,12 +550,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 19.96,
- "tps_std": 0.01,
+ "tps_mean": 20.11,
+ "tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 68.01,
@@ -389,8 +563,8 @@
"quant": "Q4_K_XL",
"log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -401,12 +575,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 133.91,
- "tps_std": 0.57,
+ "tps_mean": 126.27,
+ "tps_std": 0.47,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 68.01,
@@ -414,8 +588,8 @@
"quant": "Q4_K_XL",
"log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -426,12 +600,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 19.94,
+ "tps_mean": 19.86,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 68.01,
@@ -439,8 +613,8 @@
"quant": "Q4_K_XL",
"log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -451,507 +625,7 @@
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 129.49,
- "tps_std": 0.48,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 19.95,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 138.34,
- "tps_std": 0.27,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 19.9,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 124.65,
- "tps_std": 0.23,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 19.91,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 100.9,
- "tps_std": 0.22,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 20.15,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 129.49,
- "tps_std": 0.14,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 19.88,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 103.73,
- "tps_std": 0.14,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 20.07,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 201.03,
- "tps_std": 0.31,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 22.82,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 201.89,
- "tps_std": 0.37,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 22.85,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 128.01,
- "tps_std": 0.31,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 22.92,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 132.56,
- "tps_std": 0.31,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 23.31,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 68.01,
- "name_params_b": 110.47,
- "quant": "Q4_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 124.75,
+ "tps_mean": 158.54,
"tps_std": 0.42,
"error": false,
"error_type": null,
@@ -959,13 +633,432 @@
"ngl": 99,
"mmap": 0,
"params_b": 110.47,
- "file_size_gib": 94.57,
+ "file_size_gib": 68.01,
"name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log",
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 20.11,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 166.11,
+ "tps_std": 0.32,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 19.83,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 89.6,
+ "tps_std": 0.2,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 20.22,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 64.66,
+ "tps_std": 0.16,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 20.35,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 197.95,
+ "tps_std": 0.29,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 23.24,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 199.4,
+ "tps_std": 0.35,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 23.26,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 126.28,
+ "tps_std": 0.17,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 23.33,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 131.64,
+ "tps_std": 0.32,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "GLM-4.5-Air-UD-Q4_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 23.88,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 68.01,
+ "name_params_b": 110.47,
+ "quant": "Q4_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -975,23 +1068,20 @@
"env_base": "rocm6_4_2",
"env_variant": "rocwmma",
"fa": false,
- "test": "tg128",
- "tps_mean": 15.43,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
@@ -1004,7 +1094,7 @@
"tps_mean": null,
"tps_std": null,
"error": true,
- "error_type": "hang",
+ "error_type": "runtime",
"backend": null,
"ngl": null,
"mmap": null,
@@ -1022,23 +1112,20 @@
"env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
- "test": "pp512",
- "tps_mean": 124.94,
- "tps_std": 0.42,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
@@ -1046,345 +1133,380 @@
"env": "rocm6_4_2",
"env_base": "rocm6_4_2",
"env_variant": null,
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 121.82,
+ "tps_std": 0.35,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 15.35,
+ "tps_mean": 15.59,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
"name_params_b": 110.47,
"quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2.log",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
"model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 100.41,
- "tps_std": 0.16,
+ "tps_mean": 126.6,
+ "tps_std": 0.3,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
"name_params_b": 110.47,
"quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
"model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 15.53,
+ "tps_mean": 15.62,
+ "tps_std": 0.04,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 117.95,
+ "tps_std": 0.3,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 15.65,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
"name_params_b": 110.47,
"quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_2__fa1.log",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
"model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
"env_variant": null,
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__fa1.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": null,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log",
+ "build": null
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 118.61,
- "tps_std": 0.54,
+ "tps_mean": 69.19,
+ "tps_std": 0.2,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
"name_params_b": 110.47,
"quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
"model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
"fa": false,
"test": "tg128",
+ "tps_mean": 15.64,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 114.61,
+ "tps_std": 0.2,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
"tps_mean": 15.51,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 90.24,
- "tps_std": 0.13,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 15.55,
- "tps_std": 0.04,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 123.75,
- "tps_std": 0.39,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
"ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
"name_params_b": 110.47,
"quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 15.48,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 118.92,
- "tps_std": 0.39,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 15.47,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 127.14,
- "tps_std": 0.27,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 15.47,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -1394,20 +1516,48 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
+ "test": "pp512",
+ "tps_mean": 120.88,
+ "tps_std": 0.92,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 15.61,
+ "tps_std": 0.09,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
@@ -1416,60 +1566,63 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
"test": "pp512",
- "tps_mean": 118.52,
- "tps_std": 0.35,
+ "tps_mean": 150.07,
+ "tps_std": 0.56,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
"name_params_b": 110.47,
"quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
"model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "rocm7_rc",
+ "env": "rocm7_rc-rocwmma-hblt0",
"env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
"test": "tg128",
"tps_mean": 15.52,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 69.52,
+ "tps_std": 0.17,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
@@ -1477,8 +1630,33 @@
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 15.63,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -1489,12 +1667,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 97.36,
- "tps_std": 0.07,
+ "tps_mean": 74.02,
+ "tps_std": 0.13,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
@@ -1502,8 +1680,8 @@
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -1514,12 +1692,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 15.57,
- "tps_std": 0.02,
+ "tps_mean": 15.73,
+ "tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
@@ -1527,8 +1705,8 @@
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -1538,20 +1716,48 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
+ "test": "pp512",
+ "tps_mean": 142.67,
+ "tps_std": 0.75,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 15.68,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
@@ -1583,12 +1789,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 223.59,
- "tps_std": 0.5,
+ "tps_mean": 219.81,
+ "tps_std": 0.7,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
@@ -1596,8 +1802,8 @@
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -1608,112 +1814,87 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 16.51,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 225.75,
- "tps_std": 0.69,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 16.53,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 127.35,
- "tps_std": 0.43,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 110.47,
- "file_size_gib": 94.57,
- "name_params_b": 110.47,
- "quant": "Q6_K_XL",
- "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
- "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
"tps_mean": 16.8,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 222.2,
+ "tps_std": 0.63,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 16.82,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 126.55,
+ "tps_std": 0.4,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
@@ -1721,8 +1902,33 @@
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003",
+ "model_clean": "GLM-4.5-Air-UD-Q6_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 17.07,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 110.47,
+ "file_size_gib": 94.57,
+ "name_params_b": 110.47,
+ "quant": "Q6_K_XL",
+ "log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -1733,12 +1939,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 131.91,
- "tps_std": 0.42,
+ "tps_mean": 131.25,
+ "tps_std": 0.5,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
@@ -1746,8 +1952,8 @@
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -1758,12 +1964,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 17.02,
+ "tps_mean": 17.31,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 110.47,
"file_size_gib": 94.57,
@@ -1771,644 +1977,8 @@
"quant": "Q6_K_XL",
"log": "results/GLM-4.5-Air-UD-Q6_K_XL-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 33.87,
- "tps_std": 0.05,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 2.64,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 108.88,
- "tps_std": 0.21,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 2.65,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 109.02,
- "tps_std": 0.07,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 2.65,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 117.34,
- "tps_std": 0.09,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 2.65,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 109.17,
- "tps_std": 0.12,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 2.65,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__fa1.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "load",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "load",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 72.0,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": null
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 78.54,
- "tps_std": 0.14,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 2.67,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 81.12,
- "tps_std": 0.08,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Kimi-Dev-72B-UD-Q8_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 2.67,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 72.71,
- "file_size_gib": 78.21,
- "name_params_b": 72.71,
- "quant": "Q8_K_XL",
- "log": "results/Kimi-Dev-72B-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -2462,161 +2032,11 @@
"env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
- "test": "pp512",
- "tps_mean": 33.28,
- "tps_std": 0.05,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 2.73,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 30.88,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 2.73,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 95.65,
- "tps_std": 0.23,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 2.74,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
"test": null,
"tps_mean": null,
"tps_std": null,
"error": true,
- "error_type": "runtime",
+ "error_type": "hang",
"backend": null,
"ngl": null,
"mmap": null,
@@ -2624,14 +2044,308 @@
"file_size_gib": null,
"name_params_b": 70.0,
"quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__fa1.log",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2.log",
"build": null
},
{
"model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
"model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 16.16,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 2.78,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 98.02,
+ "tps_std": 0.18,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 2.77,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 101.83,
+ "tps_std": 0.11,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 2.77,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 70.0,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 70.0,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": null
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 97.13,
+ "tps_std": 0.17,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 2.78,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 80.42,
+ "tps_std": 0.08,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 2.78,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": null,
@@ -2646,14 +2360,14 @@
"file_size_gib": null,
"name_params_b": 70.0,
"quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0.log",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0.log",
"build": null
},
{
"model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
"model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
"test": null,
@@ -2668,7 +2382,7 @@
"file_size_gib": null,
"name_params_b": 70.0,
"quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
"build": null
},
{
@@ -2679,12 +2393,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 95.63,
- "tps_std": 0.19,
+ "tps_mean": 97.31,
+ "tps_std": 0.2,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -2692,8 +2406,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -2704,12 +2418,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 2.73,
+ "tps_mean": 2.78,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -2717,8 +2431,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -2729,12 +2443,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 103.15,
+ "tps_mean": 100.85,
"tps_std": 0.13,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -2742,8 +2456,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -2754,12 +2468,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 2.73,
+ "tps_mean": 2.77,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -2767,8 +2481,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -2778,20 +2492,48 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 70.0,
+ "test": "pp512",
+ "tps_mean": 93.0,
+ "tps_std": 0.22,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 2.78,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
@@ -2800,20 +2542,48 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 70.0,
+ "test": "pp512",
+ "tps_mean": 97.88,
+ "tps_std": 0.09,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 2.77,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
@@ -2823,12 +2593,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 95.15,
- "tps_std": 0.14,
+ "tps_mean": 99.41,
+ "tps_std": 0.36,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -2836,8 +2606,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -2848,12 +2618,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 2.74,
+ "tps_mean": 2.77,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -2861,8 +2631,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -2894,6 +2664,56 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": false,
+ "test": "pp512",
+ "tps_mean": 94.06,
+ "tps_std": 0.09,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 2.78,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 70.55,
+ "file_size_gib": 75.65,
+ "name_params_b": 70.55,
+ "quant": "Q8_K_XL",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
+ "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
"test": null,
"tps_mean": null,
"tps_std": null,
@@ -2906,59 +2726,9 @@
"file_size_gib": null,
"name_params_b": 70.0,
"quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
+ "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
"build": null
},
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 30.04,
- "tps_std": 0.04,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
- "model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 2.74,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 75.65,
- "name_params_b": 70.55,
- "quant": "Q8_K_XL",
- "log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
{
"model": "Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002",
"model_clean": "Llama-3.3-70B-Instruct-UD-Q8_K_XL",
@@ -2967,12 +2737,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 98.2,
- "tps_std": 0.18,
+ "tps_mean": 98.03,
+ "tps_std": 0.24,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -2980,8 +2750,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -2992,12 +2762,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 2.75,
+ "tps_mean": 2.78,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -3005,8 +2775,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3017,12 +2787,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 99.14,
- "tps_std": 0.35,
+ "tps_mean": 99.12,
+ "tps_std": 0.25,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -3030,8 +2800,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3042,12 +2812,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 2.74,
+ "tps_mean": 2.77,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -3055,8 +2825,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3067,12 +2837,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 79.91,
- "tps_std": 0.16,
+ "tps_mean": 75.59,
+ "tps_std": 0.28,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -3080,8 +2850,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3092,12 +2862,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 2.75,
+ "tps_mean": 2.78,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -3105,8 +2875,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3117,12 +2887,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 82.4,
- "tps_std": 0.16,
+ "tps_mean": 80.09,
+ "tps_std": 0.38,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -3130,8 +2900,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3142,12 +2912,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 2.75,
+ "tps_mean": 2.78,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 70.55,
"file_size_gib": 75.65,
@@ -3155,8 +2925,8 @@
"quant": "Q8_K_XL",
"log": "results/Llama-3.3-70B-Instruct-UD-Q8_K_XL-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3166,48 +2936,20 @@
"env_base": "rocm6_4_2",
"env_variant": "rocwmma",
"fa": false,
- "test": "pp512",
- "tps_mean": 134.21,
- "tps_std": 0.58,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 14.43,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
@@ -3238,48 +2980,20 @@
"env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
- "test": "pp512",
- "tps_mean": 133.77,
- "tps_std": 0.46,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 14.3,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
@@ -3292,7 +3006,7 @@
"tps_mean": null,
"tps_std": null,
"error": true,
- "error_type": "runtime",
+ "error_type": "hang",
"backend": null,
"ngl": null,
"mmap": null,
@@ -3306,14 +3020,136 @@
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": null,
"tps_mean": null,
"tps_std": null,
"error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 291.08,
+ "tps_std": 1.26,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 14.53,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 134.19,
+ "tps_std": 1.49,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.56,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
"error_type": "hang",
"backend": null,
"ngl": null,
@@ -3322,14 +3158,64 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
"build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 270.28,
+ "tps_std": 1.29,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.58,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
"env_variant": null,
"fa": true,
"test": null,
@@ -3344,14 +3230,14 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__fa1.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__fa1.log",
"build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": null,
@@ -3366,19 +3252,63 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0.log",
"build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
"test": "pp512",
- "tps_mean": 103.96,
- "tps_std": 0.18,
+ "tps_mean": 285.84,
+ "tps_std": 9.41,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -3388,22 +3318,22 @@
"file_size_gib": 82.35,
"name_params_b": 107.77,
"quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 14.47,
- "tps_std": 0.02,
+ "tps_mean": 14.37,
+ "tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -3413,110 +3343,10 @@
"file_size_gib": 82.35,
"name_params_b": 107.77,
"quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 273.64,
- "tps_std": 0.59,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 14.43,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 293.87,
- "tps_std": 1.35,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 14.31,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -3526,20 +3356,23 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
+ "test": "pp512",
+ "tps_mean": 273.97,
+ "tps_std": 1.67,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
@@ -3547,6 +3380,131 @@
"env": "rocm7_rc-rocwmma-hblt0",
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.57,
+ "tps_std": 0.05,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 285.26,
+ "tps_std": 1.79,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 14.33,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 276.37,
+ "tps_std": 1.65,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.57,
+ "tps_std": 0.04,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
"fa": true,
"test": null,
"tps_mean": null,
@@ -3560,109 +3518,9 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log",
"build": null
},
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 269.3,
- "tps_std": 1.99,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 14.43,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 225.7,
- "tps_std": 1.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 14.46,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
@@ -3671,8 +3529,8 @@
"env_variant": "hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 135.16,
- "tps_std": 0.44,
+ "tps_mean": 269.17,
+ "tps_std": 0.99,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -3684,8 +3542,8 @@
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -3696,8 +3554,8 @@
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 14.41,
- "tps_std": 0.0,
+ "tps_mean": 14.63,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -3709,8 +3567,8 @@
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -3743,12 +3601,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 243.54,
- "tps_std": 1.24,
+ "tps_mean": 242.07,
+ "tps_std": 1.05,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 82.35,
@@ -3756,8 +3614,8 @@
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3768,12 +3626,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 15.34,
- "tps_std": 0.0,
+ "tps_mean": 15.56,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 82.35,
@@ -3781,8 +3639,8 @@
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3793,112 +3651,112 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 246.48,
- "tps_std": 1.35,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 15.09,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 147.36,
- "tps_std": 0.8,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 15.3,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 82.35,
- "name_params_b": 107.77,
- "quant": "Q6_K",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 150.06,
+ "tps_mean": 244.49,
"tps_std": 1.13,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 15.33,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 147.08,
+ "tps_std": 0.98,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 15.5,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 82.35,
+ "name_params_b": 107.77,
+ "quant": "Q6_K",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q6_K",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 149.97,
+ "tps_std": 1.1,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 82.35,
@@ -3906,8 +3764,8 @@
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3918,12 +3776,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 15.27,
+ "tps_mean": 15.49,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 82.35,
@@ -3931,8 +3789,8 @@
"quant": "Q6_K",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q6_K-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -3942,48 +3800,20 @@
"env_base": "rocm6_4_2",
"env_variant": "rocwmma",
"fa": false,
- "test": "pp512",
- "tps_mean": 135.23,
- "tps_std": 0.81,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 106.65,
- "name_params_b": 107.77,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 11.62,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 106.65,
- "name_params_b": 107.77,
- "quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
@@ -4014,48 +3844,20 @@
"env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
- "test": "pp512",
- "tps_mean": 135.29,
- "tps_std": 0.58,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 106.65,
- "name_params_b": 107.77,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 11.6,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 106.65,
- "name_params_b": 107.77,
- "quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
@@ -4068,7 +3870,7 @@
"tps_mean": null,
"tps_std": null,
"error": true,
- "error_type": "runtime",
+ "error_type": "hang",
"backend": null,
"ngl": null,
"mmap": null,
@@ -4082,58 +3884,202 @@
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 262.13,
- "tps_std": 9.71,
+ "tps_mean": 270.35,
+ "tps_std": 3.39,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
"name_params_b": 107.77,
"quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 11.65,
- "tps_std": 0.01,
+ "tps_mean": 11.78,
+ "tps_std": 0.03,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
"name_params_b": 107.77,
"quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 292.23,
+ "tps_std": 3.13,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 11.73,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 140.27,
+ "tps_std": 0.97,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 11.74,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
"env_variant": null,
"fa": true,
"test": null,
@@ -4148,14 +4094,14 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__fa1.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__fa1.log",
"build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": null,
@@ -4170,14 +4116,14 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0.log",
"build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
"test": null,
@@ -4192,7 +4138,7 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_beta__hblt0__fa1.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm6_4_3__hblt0__fa1.log",
"build": null
},
{
@@ -4203,12 +4149,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 267.45,
- "tps_std": 1.9,
+ "tps_mean": 279.13,
+ "tps_std": 2.9,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4216,8 +4162,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -4228,12 +4174,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 11.6,
- "tps_std": 0.05,
+ "tps_mean": 11.79,
+ "tps_std": 0.07,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4241,8 +4187,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -4253,12 +4199,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 293.37,
- "tps_std": 7.08,
+ "tps_mean": 293.6,
+ "tps_std": 3.84,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4266,8 +4212,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -4278,12 +4224,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 11.54,
- "tps_std": 0.03,
+ "tps_mean": 11.62,
+ "tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4291,8 +4237,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -4302,20 +4248,48 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
+ "test": "pp512",
+ "tps_mean": 264.02,
+ "tps_std": 2.74,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 11.79,
+ "tps_std": 0.06,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
@@ -4324,20 +4298,48 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
+ "test": "pp512",
+ "tps_mean": 279.69,
+ "tps_std": 2.3,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 11.6,
+ "tps_std": 0.04,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
@@ -4347,12 +4349,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 272.38,
- "tps_std": 1.28,
+ "tps_mean": 252.38,
+ "tps_std": 7.7,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4360,8 +4362,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -4372,12 +4374,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 11.64,
- "tps_std": 0.01,
+ "tps_mean": 11.35,
+ "tps_std": 0.6,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4385,8 +4387,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -4418,20 +4420,48 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
+ "test": "pp512",
+ "tps_mean": 271.54,
+ "tps_std": 4.1,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 11.57,
+ "tps_std": 0.58,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
@@ -4463,12 +4493,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 255.55,
- "tps_std": 1.38,
+ "tps_mean": 258.54,
+ "tps_std": 1.39,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4476,8 +4506,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -4488,12 +4518,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 12.27,
+ "tps_mean": 12.45,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4501,8 +4531,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -4513,12 +4543,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 259.07,
- "tps_std": 1.3,
+ "tps_mean": 262.84,
+ "tps_std": 1.39,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4526,8 +4556,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -4538,12 +4568,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 12.11,
+ "tps_mean": 12.3,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4551,8 +4581,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -4563,62 +4593,62 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 168.01,
+ "tps_mean": 169.23,
+ "tps_std": 0.84,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 12.45,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 106.65,
+ "name_params_b": 107.77,
+ "quant": "Q8_0",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 173.79,
"tps_std": 0.85,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 106.65,
- "name_params_b": 107.77,
- "quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 12.3,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 106.65,
- "name_params_b": 107.77,
- "quant": "Q8_0",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-Q8_0",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 172.71,
- "tps_std": 0.91,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4626,8 +4656,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -4638,12 +4668,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 12.28,
- "tps_std": 0.0,
+ "tps_mean": 12.44,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 106.65,
@@ -4651,8 +4681,8 @@
"quant": "Q8_0",
"log": "results/Llama-4-Scout-17B-16E-Instruct-Q8_0-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -4662,48 +4692,20 @@
"env_base": "rocm6_4_2",
"env_variant": "rocwmma",
"fa": false,
- "test": "pp512",
- "tps_mean": 137.82,
- "tps_std": 0.73,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 17.41,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
@@ -4734,206 +4736,6 @@
"env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
- "test": "pp512",
- "tps_mean": 137.63,
- "tps_std": 0.8,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 17.29,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 122.98,
- "tps_std": 0.59,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 17.53,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 281.87,
- "tps_std": 1.98,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 17.59,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 233.14,
- "tps_std": 0.9,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 17.59,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
"test": null,
"tps_mean": null,
"tps_std": null,
@@ -4946,42 +4748,20 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0.log",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2.log",
"build": null
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
"model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
"fa": true,
"test": null,
"tps_mean": null,
"tps_std": null,
"error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
"error_type": "runtime",
"backend": null,
"ngl": null,
@@ -4990,32 +4770,376 @@
"file_size_gib": null,
"name_params_b": 17.0,
"quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_2__fa1.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 285.51,
+ "tps_std": 1.64,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 17.7,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 308.62,
+ "tps_std": 2.62,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 17.54,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 137.71,
+ "tps_std": 0.62,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 17.71,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 142.62,
+ "tps_std": 0.82,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 17.55,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 286.37,
+ "tps_std": 1.44,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 17.7,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 234.68,
+ "tps_std": 1.31,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 17.71,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 17.0,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
+ "build": null
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 290.54,
+ "tps_std": 1.59,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": null
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 307.08,
- "tps_std": 2.67,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5024,14 +5148,39 @@
"env": "rocm7_rc-rocwmma",
"env_base": "rocm7_rc",
"env_variant": "rocwmma",
- "fa": true,
+ "fa": false,
"test": "tg128",
- "tps_mean": 17.34,
+ "tps_mean": 17.67,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 304.99,
+ "tps_std": 0.37,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5039,8 +5188,33 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 17.28,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5051,8 +5225,8 @@
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 138.22,
- "tps_std": 0.46,
+ "tps_mean": 283.93,
+ "tps_std": 1.57,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -5064,8 +5238,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5076,8 +5250,8 @@
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 17.45,
- "tps_std": 0.09,
+ "tps_mean": 17.65,
+ "tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -5089,8 +5263,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5100,20 +5274,48 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
+ "test": "pp512",
+ "tps_mean": 300.13,
+ "tps_std": 1.26,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 17.27,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
@@ -5123,12 +5325,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 281.24,
+ "tps_mean": 291.6,
"tps_std": 1.95,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5136,8 +5338,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5148,12 +5350,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 17.56,
+ "tps_mean": 17.73,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5161,8 +5363,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5194,20 +5396,48 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
+ "test": "pp512",
+ "tps_mean": 285.56,
+ "tps_std": 1.41,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 17.72,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
@@ -5216,20 +5446,48 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 17.0,
+ "test": "pp512",
+ "tps_mean": 227.75,
+ "tps_std": 1.52,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 17.73,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__rocm7_rc__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
@@ -5239,12 +5497,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 218.27,
- "tps_std": 0.8,
+ "tps_mean": 216.64,
+ "tps_std": 2.76,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5252,8 +5510,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -5264,12 +5522,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 20.09,
- "tps_std": 0.01,
+ "tps_mean": 20.39,
+ "tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5277,8 +5535,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -5289,12 +5547,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 220.73,
- "tps_std": 0.69,
+ "tps_mean": 217.68,
+ "tps_std": 4.15,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5302,8 +5560,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -5314,62 +5572,37 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 19.64,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 152.77,
- "tps_std": 0.73,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 107.77,
- "file_size_gib": 57.73,
- "name_params_b": 107.77,
- "quant": "Q4_K_XL",
- "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
- "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 20.02,
+ "tps_mean": 19.97,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 151.98,
+ "tps_std": 0.6,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5377,8 +5610,33 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002",
+ "model_clean": "Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 20.26,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 107.77,
+ "file_size_gib": 57.73,
+ "name_params_b": 107.77,
+ "quant": "Q4_K_XL",
+ "log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -5389,12 +5647,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 155.24,
- "tps_std": 1.01,
+ "tps_mean": 154.96,
+ "tps_std": 0.82,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5402,8 +5660,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -5414,12 +5672,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 19.99,
- "tps_std": 0.0,
+ "tps_mean": 20.28,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 107.77,
"file_size_gib": 57.73,
@@ -5427,8 +5685,8 @@
"quant": "Q4_K_XL",
"log": "results/Llama-4-Scout-17B-16E-Instruct-UD-Q4_K_XL-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -5482,23 +5740,20 @@
"env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
- "test": "pp512",
- "tps_mean": 73.83,
- "tps_std": 0.16,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 235.0,
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
@@ -5506,102 +5761,221 @@
"env": "rocm6_4_2",
"env_base": "rocm6_4_2",
"env_variant": null,
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 235.0,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log",
+ "build": null
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 130.11,
+ "tps_std": 0.68,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 13.95,
+ "tps_std": 0.04,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 144.31,
+ "tps_std": 0.8,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 13.71,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 235.0,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log",
+ "build": null
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "hang",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 235.0,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": null
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 131.78,
+ "tps_std": 1.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
"fa": false,
"test": "tg128",
"tps_mean": 13.68,
- "tps_std": 0.01,
+ "tps_std": 0.43,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
"name_params_b": 235.09,
"quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2.log",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
"model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 61.47,
- "tps_std": 0.09,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 13.83,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 235.0,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta.log",
- "build": null
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
"env_variant": null,
"fa": true,
"test": null,
@@ -5616,14 +5990,14 @@
"file_size_gib": null,
"name_params_b": 235.0,
"quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__fa1.log",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__fa1.log",
"build": null
},
{
"model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
"model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": null,
@@ -5638,14 +6012,14 @@
"file_size_gib": null,
"name_params_b": 235.0,
"quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0.log",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0.log",
"build": null
},
{
"model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
"model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
"test": null,
@@ -5660,7 +6034,7 @@
"file_size_gib": null,
"name_params_b": 235.0,
"quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_beta__hblt0__fa1.log",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm6_4_3__hblt0__fa1.log",
"build": null
},
{
@@ -5670,106 +6044,6 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma",
"fa": false,
- "test": "pp512",
- "tps_mean": 129.7,
- "tps_std": 0.81,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 13.66,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 145.18,
- "tps_std": 0.48,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 13.43,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
"test": null,
"tps_mean": null,
"tps_std": null,
@@ -5782,70 +6056,173 @@
"file_size_gib": null,
"name_params_b": 235.0,
"quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma.log",
"build": null
},
{
"model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
"model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
+ "env": "rocm7_rc-rocwmma",
"env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
+ "env_variant": "rocwmma",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 235.0,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
"test": "pp512",
- "tps_mean": 130.56,
- "tps_std": 0.46,
+ "tps_mean": 141.61,
+ "tps_std": 0.92,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
"name_params_b": 235.09,
"quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
"model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc",
+ "env": "rocm7_rc-rocwmma",
"env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
+ "env_variant": "rocwmma",
+ "fa": true,
"test": "tg128",
- "tps_mean": 13.87,
+ "tps_mean": 13.34,
"tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 133.33,
+ "tps_std": 0.68,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 13.78,
+ "tps_std": 0.04,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 139.6,
+ "tps_std": 0.47,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 13.03,
+ "tps_std": 0.57,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 134.95,
+ "tps_std": 0.76,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
@@ -5853,8 +6230,8 @@
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5863,48 +6240,70 @@
"env": "rocm7_rc",
"env_base": "rocm7_rc",
"env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 97.08,
- "tps_std": 0.34,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
+ "fa": false,
"test": "tg128",
- "tps_mean": 13.9,
- "tps_std": 0.03,
+ "tps_mean": 13.99,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
"name_params_b": 235.09,
"quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 235.0,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__fa1.log",
+ "build": null
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 135.29,
+ "tps_std": 0.51,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -5914,20 +6313,23 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 235.0,
+ "test": "tg128",
+ "tps_mean": 13.97,
+ "tps_std": 0.04,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__rocm7_rc__hblt0.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
@@ -5959,12 +6361,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 114.76,
- "tps_std": 0.62,
+ "tps_mean": 112.93,
+ "tps_std": 0.63,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
@@ -5972,8 +6374,8 @@
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -5984,62 +6386,37 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 16.06,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 116.18,
- "tps_std": 0.67,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 235.09,
- "file_size_gib": 96.99,
- "name_params_b": 235.09,
- "quant": "Q3_K_XL",
- "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
- "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 15.9,
+ "tps_mean": 16.43,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 114.35,
+ "tps_std": 1.12,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
@@ -6047,8 +6424,33 @@
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003",
+ "model_clean": "Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 16.27,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 235.09,
+ "file_size_gib": 96.99,
+ "name_params_b": 235.09,
+ "quant": "Q3_K_XL",
+ "log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6059,12 +6461,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 64.79,
- "tps_std": 0.39,
+ "tps_mean": 64.6,
+ "tps_std": 0.38,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
@@ -6072,8 +6474,8 @@
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6084,12 +6486,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 16.61,
- "tps_std": 0.0,
+ "tps_mean": 17.03,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
@@ -6097,8 +6499,8 @@
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6109,12 +6511,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 66.84,
+ "tps_mean": 66.6,
"tps_std": 0.42,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
@@ -6122,8 +6524,8 @@
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6134,12 +6536,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 16.86,
+ "tps_mean": 17.28,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 235.09,
"file_size_gib": 96.99,
@@ -6147,8 +6549,8 @@
"quant": "Q3_K_XL",
"log": "results/Qwen3-235B-A22B-Instruct-2507-UD-Q3_K_XL-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6159,8 +6561,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 157.78,
- "tps_std": 2.71,
+ "tps_mean": 157.75,
+ "tps_std": 2.58,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -6172,8 +6574,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6184,7 +6586,7 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 24.56,
+ "tps_mean": 24.62,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -6197,8 +6599,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6209,8 +6611,8 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 161.64,
- "tps_std": 2.99,
+ "tps_mean": 161.9,
+ "tps_std": 3.05,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -6222,8 +6624,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6234,8 +6636,8 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 23.94,
- "tps_std": 0.0,
+ "tps_mean": 24.09,
+ "tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -6247,8 +6649,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6259,12 +6661,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 157.64,
- "tps_std": 2.49,
+ "tps_mean": 157.81,
+ "tps_std": 2.51,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6272,8 +6674,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6284,12 +6686,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 23.93,
+ "tps_mean": 24.61,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6297,8 +6699,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6309,12 +6711,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 140.32,
- "tps_std": 1.99,
+ "tps_mean": 140.24,
+ "tps_std": 1.86,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6322,8 +6724,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6334,12 +6736,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 24.32,
- "tps_std": 0.0,
+ "tps_mean": 24.46,
+ "tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6347,421 +6749,624 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "Qwen3-30B-A3B-BF16-00001-of-00002",
"model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 424.74,
- "tps_std": 7.06,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.48,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 30.0,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__fa1.log",
- "build": null
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 154.45,
- "tps_std": 1.39,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.52,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 138.46,
- "tps_std": 1.64,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.29,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 425.56,
- "tps_std": 3.28,
+ "tps_mean": 438.42,
+ "tps_std": 4.14,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
"name_params_b": 30.53,
"quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "Qwen3-30B-A3B-BF16-00001-of-00002",
"model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 24.8,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 472.05,
- "tps_std": 4.59,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.12,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 153.54,
- "tps_std": 2.25,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.74,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 158.2,
- "tps_std": 2.47,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.12,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 426.72,
- "tps_std": 7.55,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
"tps_mean": 24.57,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 475.43,
+ "tps_std": 7.4,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 24.08,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 158.13,
+ "tps_std": 2.4,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 24.58,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 163.4,
+ "tps_std": 3.21,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 24.14,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 441.36,
+ "tps_std": 3.35,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 24.6,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 337.36,
+ "tps_std": 3.48,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 24.45,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 161.73,
+ "tps_std": 1.23,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 24.58,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 143.05,
+ "tps_std": 2.1,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 24.42,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 448.63,
+ "tps_std": 5.9,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 24.96,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 473.34,
+ "tps_std": 8.6,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 23.99,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 338.07,
+ "tps_std": 3.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 24.93,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 342.57,
+ "tps_std": 3.12,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 23.97,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 444.3,
+ "tps_std": 6.78,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6769,8 +7374,33 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
+ "model_clean": "Qwen3-30B-A3B-BF16",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 24.66,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 56.89,
+ "name_params_b": 30.53,
+ "quant": "BF16",
+ "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6803,8 +7433,8 @@
"env_variant": "hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 153.89,
- "tps_std": 1.73,
+ "tps_mean": 333.42,
+ "tps_std": 6.83,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -6816,8 +7446,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6828,7 +7458,7 @@
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 24.57,
+ "tps_mean": 24.69,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -6841,8 +7471,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -6852,48 +7482,20 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": true,
- "test": "pp512",
- "tps_mean": 137.06,
- "tps_std": 2.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
+ "test": null,
+ "tps_mean": null,
+ "tps_std": null,
+ "error": true,
+ "error_type": "runtime",
+ "backend": null,
+ "ngl": null,
+ "mmap": null,
+ "params_b": null,
+ "file_size_gib": null,
+ "name_params_b": 30.0,
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-BF16-00001-of-00002",
- "model_clean": "Qwen3-30B-A3B-BF16",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.32,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
+ "build": null
},
{
"model": "Qwen3-30B-A3B-BF16-00001-of-00002",
@@ -6903,12 +7505,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 107.55,
- "tps_std": 0.11,
+ "tps_mean": 106.47,
+ "tps_std": 0.1,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6916,8 +7518,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6928,12 +7530,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 8.09,
+ "tps_mean": 8.18,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6941,8 +7543,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6953,12 +7555,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 107.68,
- "tps_std": 0.13,
+ "tps_mean": 106.77,
+ "tps_std": 0.12,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6966,8 +7568,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -6978,12 +7580,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 8.03,
+ "tps_mean": 8.11,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -6991,8 +7593,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -7003,12 +7605,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 86.02,
+ "tps_mean": 84.71,
"tps_std": 0.11,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -7016,8 +7618,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -7028,12 +7630,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 7.46,
+ "tps_mean": 7.52,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -7041,8 +7643,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -7053,12 +7655,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 86.93,
- "tps_std": 0.15,
+ "tps_mean": 85.7,
+ "tps_std": 0.1,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -7066,8 +7668,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -7078,12 +7680,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 7.44,
+ "tps_mean": 7.52,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 56.89,
@@ -7091,8 +7693,8 @@
"quant": "BF16",
"log": "results/Qwen3-30B-A3B-BF16-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -7103,8 +7705,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 387.45,
- "tps_std": 1.17,
+ "tps_mean": 387.23,
+ "tps_std": 0.82,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -7116,8 +7718,8 @@
"quant": "Q6_K_XL",
"log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -7128,7 +7730,7 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 50.42,
+ "tps_mean": 50.64,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -7141,8 +7743,8 @@
"quant": "Q6_K_XL",
"log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -7153,258 +7755,8 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 411.6,
- "tps_std": 0.78,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 48.14,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 385.52,
- "tps_std": 0.67,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 50.06,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 300.86,
- "tps_std": 0.38,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 49.71,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 534.84,
- "tps_std": 2.48,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 50.21,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
"tps_mean": 411.72,
- "tps_std": 2.56,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 49.76,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 387.34,
- "tps_std": 1.49,
+ "tps_std": 1.04,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -7414,71 +7766,21 @@
"file_size_gib": 24.53,
"name_params_b": 30.53,
"quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
"model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 50.23,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 300.58,
- "tps_std": 1.17,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 49.78,
+ "tps_mean": 48.78,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -7489,242 +7791,42 @@
"file_size_gib": 24.53,
"name_params_b": 30.53,
"quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_beta__hblt0__fa1.log",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
"model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 535.44,
- "tps_std": 6.9,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 50.07,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 619.02,
- "tps_std": 7.73,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 47.63,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 387.98,
- "tps_std": 0.76,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 50.09,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 413.28,
- "tps_std": 2.05,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 47.63,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 540.14,
- "tps_std": 5.22,
+ "tps_mean": 387.86,
+ "tps_std": 1.41,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 24.53,
"name_params_b": 30.53,
"quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
"model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
"env_variant": null,
"fa": false,
"test": "tg128",
@@ -7733,7 +7835,682 @@
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 301.23,
+ "tps_std": 0.49,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 50.07,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 564.83,
+ "tps_std": 6.58,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.68,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 624.99,
+ "tps_std": 3.81,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 48.64,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 389.25,
+ "tps_std": 2.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.66,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 412.18,
+ "tps_std": 1.15,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 48.8,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 562.86,
+ "tps_std": 10.14,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.74,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 418.07,
+ "tps_std": 1.65,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 50.11,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 387.74,
+ "tps_std": 1.7,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.65,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 301.31,
+ "tps_std": 0.65,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 50.37,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 570.31,
+ "tps_std": 5.05,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.52,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 612.79,
+ "tps_std": 4.77,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 46.73,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 572.09,
+ "tps_std": 8.22,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.45,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 605.49,
+ "tps_std": 1.47,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 46.73,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 573.05,
+ "tps_std": 6.77,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
"mmap": 0,
"params_b": 30.53,
"file_size_gib": 24.53,
@@ -7741,8 +8518,33 @@
"quant": "Q6_K_XL",
"log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.8,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -7753,1340 +8555,396 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 418.6,
- "tps_std": 2.58,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 49.63,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 386.87,
- "tps_std": 1.67,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 50.5,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 300.4,
- "tps_std": 1.44,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 49.69,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 741.97,
- "tps_std": 2.92,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 57.22,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 731.64,
- "tps_std": 2.8,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 53.53,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 396.38,
- "tps_std": 1.53,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 59.54,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 406.84,
- "tps_std": 1.62,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 58.5,
- "tps_std": 0.1,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 24.53,
- "name_params_b": 30.53,
- "quant": "Q6_K_XL",
- "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 150.37,
- "tps_std": 1.75,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.49,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 153.97,
- "tps_std": 1.9,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 23.98,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 150.06,
- "tps_std": 1.71,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 23.13,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 30.0,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm6_4_2__fa1.log",
- "build": null
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 408.29,
- "tps_std": 1.82,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.53,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 30.0,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__fa1.log",
- "build": null
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 145.29,
- "tps_std": 1.91,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.53,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 130.39,
- "tps_std": 1.57,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.31,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 414.47,
- "tps_std": 3.1,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.61,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 460.12,
- "tps_std": 5.58,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.02,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 145.43,
- "tps_std": 1.04,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.8,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 150.58,
- "tps_std": 1.93,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.13,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 413.05,
- "tps_std": 2.36,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.15,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 325.48,
- "tps_std": 1.77,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.31,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 145.83,
- "tps_std": 2.39,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.12,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 130.2,
- "tps_std": 1.39,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 24.35,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 107.16,
- "tps_std": 0.06,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 8.08,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 107.26,
- "tps_std": 0.11,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 8.04,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 85.88,
- "tps_std": 0.1,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 7.48,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 86.57,
- "tps_std": 0.11,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002",
- "model_clean": "Qwen3-Coder-30B-A3B-Instruct-BF16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 7.49,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 30.53,
- "file_size_gib": 56.89,
- "name_params_b": 30.53,
- "quant": "BF16",
- "log": "results/Qwen3-Coder-30B-A3B-Instruct-BF16-00001-of-00002__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 192.14,
- "tps_std": 0.71,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 10.75,
+ "tps_mean": 416.05,
"tps_std": 3.44,
"error": false,
"error_type": null,
"backend": "ROCm",
"ngl": 99,
"mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 50.33,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 576.38,
+ "tps_std": 3.91,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 50.85,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 414.62,
+ "tps_std": 3.23,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 50.22,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__rocm7_rc__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 733.4,
+ "tps_std": 2.59,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 59.36,
+ "tps_std": 0.05,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 725.54,
+ "tps_std": 2.84,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 55.57,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 392.54,
+ "tps_std": 1.8,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 61.56,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 403.74,
+ "tps_std": 1.69,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "model_clean": "Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 60.57,
+ "tps_std": 0.08,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 30.53,
+ "file_size_gib": 24.53,
+ "name_params_b": 30.53,
+ "quant": "Q6_K_XL",
+ "log": "results/Qwen3-30B-A3B-Instruct-2507-UD-Q6_K_XL__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 222.91,
+ "tps_std": 0.21,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
"name_params_b": 11.77,
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.03,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9097,8 +8955,8 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 229.77,
- "tps_std": 0.18,
+ "tps_mean": 229.15,
+ "tps_std": 0.24,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -9110,8 +8968,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9122,7 +8980,7 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 13.58,
+ "tps_mean": 13.76,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -9135,8 +8993,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9147,12 +9005,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 222.24,
- "tps_std": 0.39,
+ "tps_mean": 222.59,
+ "tps_std": 0.24,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9160,8 +9018,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9172,12 +9030,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 13.86,
+ "tps_mean": 14.03,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9185,8 +9043,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9197,12 +9055,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 201.58,
- "tps_std": 0.09,
+ "tps_mean": 197.89,
+ "tps_std": 3.4,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9210,8 +9068,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9222,12 +9080,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 13.57,
+ "tps_mean": 13.76,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9235,120 +9093,320 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gemma-3-12b-it-UD-Q8_K_XL",
"model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 706.58,
- "tps_std": 0.96,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 13.87,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 567.65,
+ "tps_mean": 734.26,
"tps_std": 0.94,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
"name_params_b": 11.77,
"quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-12b-it-UD-Q8_K_XL",
"model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.05,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 820.41,
+ "tps_std": 1.59,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 13.77,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 213.4,
+ "tps_std": 3.62,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.04,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 224.2,
+ "tps_std": 4.73,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 13.78,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 734.7,
+ "tps_std": 1.48,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.03,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 554.49,
+ "tps_std": 0.62,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 13.6,
+ "tps_mean": 13.78,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
"name_params_b": 11.77,
"quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__fa1.log",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-12b-it-UD-Q8_K_XL",
"model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 222.31,
- "tps_std": 0.28,
+ "tps_mean": 220.22,
+ "tps_std": 1.6,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -9358,21 +9416,21 @@
"file_size_gib": 13.4,
"name_params_b": 11.77,
"quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-12b-it-UD-Q8_K_XL",
"model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 13.88,
+ "tps_mean": 14.04,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -9383,22 +9441,22 @@
"file_size_gib": 13.4,
"name_params_b": 11.77,
"quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0.log",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-12b-it-UD-Q8_K_XL",
"model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 203.03,
- "tps_std": 0.17,
+ "tps_mean": 193.9,
+ "tps_std": 1.19,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -9408,21 +9466,21 @@
"file_size_gib": 13.4,
"name_params_b": 11.77,
"quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-12b-it-UD-Q8_K_XL",
"model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 13.58,
+ "tps_mean": 13.77,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -9433,10 +9491,10 @@
"file_size_gib": 13.4,
"name_params_b": 11.77,
"quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm6_4_3__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -9447,12 +9505,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 703.1,
- "tps_std": 0.68,
+ "tps_mean": 751.04,
+ "tps_std": 1.24,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9460,8 +9518,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9472,12 +9530,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 13.83,
+ "tps_mean": 14.01,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9485,8 +9543,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9497,12 +9555,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 818.63,
- "tps_std": 0.82,
+ "tps_mean": 811.04,
+ "tps_std": 1.22,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9510,8 +9568,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9522,12 +9580,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 13.47,
+ "tps_mean": 13.45,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9535,8 +9593,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9547,8 +9605,8 @@
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 222.39,
- "tps_std": 0.17,
+ "tps_mean": 752.99,
+ "tps_std": 1.44,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -9560,8 +9618,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9572,7 +9630,7 @@
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 13.81,
+ "tps_mean": 14.0,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -9585,8 +9643,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9597,8 +9655,8 @@
"env_variant": "rocwmma-hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 228.56,
- "tps_std": 0.31,
+ "tps_mean": 794.9,
+ "tps_std": 1.42,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -9610,8 +9668,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9622,7 +9680,7 @@
"env_variant": "rocwmma-hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 13.51,
+ "tps_mean": 13.45,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -9635,8 +9693,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc-rocwmma__hblt0__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9647,12 +9705,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 706.92,
- "tps_std": 0.89,
+ "tps_mean": 752.36,
+ "tps_std": 0.48,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9660,8 +9718,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -9672,312 +9730,287 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 13.87,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 554.98,
- "tps_std": 0.46,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 13.61,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 222.26,
- "tps_std": 0.3,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 13.86,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 201.53,
- "tps_std": 0.07,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 13.59,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 675.9,
- "tps_std": 1.28,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 14.26,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 371.03,
- "tps_std": 0.33,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 12.49,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 504.61,
- "tps_std": 2.97,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 11.77,
- "file_size_gib": 13.4,
- "name_params_b": 11.77,
- "quant": "Q8_K_XL",
- "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-12b-it-UD-Q8_K_XL",
- "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
"tps_mean": 14.05,
"tps_std": 0.0,
"error": false,
"error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 569.66,
+ "tps_std": 0.6,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 13.78,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 750.36,
+ "tps_std": 1.88,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.05,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 559.73,
+ "tps_std": 0.51,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 13.79,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__rocm7_rc__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 680.44,
+ "tps_std": 0.55,
+ "error": false,
+ "error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.39,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 371.66,
+ "tps_std": 0.51,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 12.62,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 502.88,
+ "tps_std": 1.45,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -9985,8 +10018,33 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-12b-it-UD-Q8_K_XL",
+ "model_clean": "gemma-3-12b-it-UD-Q8_K_XL",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 14.21,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 11.77,
+ "file_size_gib": 13.4,
+ "name_params_b": 11.77,
+ "quant": "Q8_K_XL",
+ "log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -9997,12 +10055,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 495.37,
- "tps_std": 0.71,
+ "tps_mean": 496.33,
+ "tps_std": 1.83,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -10010,8 +10068,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -10022,12 +10080,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 13.87,
+ "tps_mean": 14.02,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 11.77,
"file_size_gib": 13.4,
@@ -10035,8 +10093,8 @@
"quant": "Q8_K_XL",
"log": "results/gemma-3-12b-it-UD-Q8_K_XL__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -10047,8 +10105,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 92.82,
- "tps_std": 0.46,
+ "tps_mean": 87.2,
+ "tps_std": 3.7,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -10060,8 +10118,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10072,7 +10130,7 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 4.05,
+ "tps_mean": 4.09,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -10085,8 +10143,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10097,8 +10155,8 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 94.62,
- "tps_std": 0.56,
+ "tps_mean": 68.87,
+ "tps_std": 14.37,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -10110,8 +10168,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10122,7 +10180,7 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 4.03,
+ "tps_mean": 4.08,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -10135,8 +10193,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10147,12 +10205,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 91.25,
- "tps_std": 0.44,
+ "tps_mean": 82.57,
+ "tps_std": 10.36,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10160,8 +10218,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10172,12 +10230,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 4.04,
+ "tps_mean": 4.09,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10185,8 +10243,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10197,12 +10255,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 84.81,
- "tps_std": 0.48,
+ "tps_mean": 74.78,
+ "tps_std": 10.12,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10210,8 +10268,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10222,12 +10280,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 4.04,
+ "tps_mean": 4.09,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10235,523 +10293,723 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gemma-3-27b-it-BF16-00001-of-00002",
"model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 395.28,
+ "tps_std": 0.22,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 3.96,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 468.37,
+ "tps_std": 1.54,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.08,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 79.42,
+ "tps_std": 0.41,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 3.97,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 89.19,
+ "tps_std": 0.53,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.06,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 405.35,
+ "tps_mean": 398.35,
+ "tps_std": 1.07,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 302.82,
+ "tps_std": 2.53,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 59.13,
+ "tps_std": 7.79,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 61.26,
+ "tps_std": 10.54,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 418.46,
+ "tps_std": 0.1,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 466.83,
+ "tps_std": 1.65,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.07,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 454.1,
+ "tps_std": 1.09,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 499.43,
+ "tps_std": 1.24,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.06,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 392.5,
+ "tps_std": 0.5,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 3.97,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 311.25,
+ "tps_std": 0.72,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 451.69,
"tps_std": 0.62,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.04,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 310.92,
- "tps_std": 0.73,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.05,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 86.8,
- "tps_std": 0.36,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.02,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 82.85,
- "tps_std": 0.49,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.03,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 404.79,
- "tps_std": 0.61,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.04,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 472.91,
- "tps_std": 1.05,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.03,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 91.08,
- "tps_std": 0.67,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.03,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 93.26,
- "tps_std": 0.55,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.03,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 368.33,
- "tps_std": 0.38,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 3.71,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 311.83,
- "tps_std": 0.31,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.04,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 27.01,
- "file_size_gib": 50.31,
- "name_params_b": 27.01,
- "quant": "BF16",
- "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-27b-it-BF16-00001-of-00002",
- "model_clean": "gemma-3-27b-it-BF16",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 80.07,
- "tps_std": 0.21,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
"ngl": 99,
"mmap": 0,
"params_b": 27.01,
@@ -10760,8 +11018,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10772,7 +11030,7 @@
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 4.0,
+ "tps_mean": 4.09,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -10785,8 +11043,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10796,20 +11054,48 @@
"env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
+ "test": "pp512",
+ "tps_mean": 324.43,
+ "tps_std": 0.22,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-27b-it-BF16-00001-of-00002",
+ "model_clean": "gemma-3-27b-it-BF16",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 4.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 27.01,
+ "file_size_gib": 50.31,
+ "name_params_b": 27.01,
+ "quant": "BF16",
+ "log": "results/gemma-3-27b-it-BF16-00001-of-00002__rocm7_rc__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "gemma-3-27b-it-BF16-00001-of-00002",
@@ -10863,12 +11149,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 135.01,
- "tps_std": 0.28,
+ "tps_mean": 129.49,
+ "tps_std": 0.34,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10876,8 +11162,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -10888,12 +11174,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 4.03,
+ "tps_mean": 4.06,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10901,8 +11187,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -10913,12 +11199,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 137.76,
- "tps_std": 0.25,
+ "tps_mean": 137.67,
+ "tps_std": 1.25,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10926,8 +11212,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -10938,12 +11224,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 4.03,
+ "tps_mean": 4.06,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 27.01,
"file_size_gib": 50.31,
@@ -10951,8 +11237,8 @@
"quant": "BF16",
"log": "results/gemma-3-27b-it-BF16-00001-of-00002__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -10963,8 +11249,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 727.59,
- "tps_std": 1.45,
+ "tps_mean": 728.7,
+ "tps_std": 1.28,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -10976,8 +11262,8 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -10988,7 +11274,7 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 76.22,
+ "tps_mean": 76.63,
"tps_std": 0.03,
"error": false,
"error_type": null,
@@ -11001,8 +11287,8 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -11013,8 +11299,8 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 750.3,
- "tps_std": 1.03,
+ "tps_mean": 752.52,
+ "tps_std": 0.83,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11026,8 +11312,8 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -11038,7 +11324,7 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 69.96,
+ "tps_mean": 70.93,
"tps_std": 0.02,
"error": false,
"error_type": null,
@@ -11051,8 +11337,8 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -11063,12 +11349,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 728.24,
- "tps_std": 0.55,
+ "tps_mean": 729.33,
+ "tps_std": 1.93,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 3.88,
"file_size_gib": 1.8,
@@ -11076,8 +11362,8 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -11088,12 +11374,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 75.89,
+ "tps_mean": 76.79,
"tps_std": 0.03,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 3.88,
"file_size_gib": 1.8,
@@ -11101,8 +11387,8 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -11113,12 +11399,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 643.29,
- "tps_std": 0.97,
+ "tps_mean": 645.25,
+ "tps_std": 0.89,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 3.88,
"file_size_gib": 1.8,
@@ -11126,8 +11412,8 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -11138,12 +11424,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 69.53,
+ "tps_mean": 70.31,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 3.88,
"file_size_gib": 1.8,
@@ -11151,120 +11437,20 @@
"quant": "Q3_K_S",
"log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 1812.73,
- "tps_std": 7.38,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 76.55,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 1548.2,
- "tps_std": 4.48,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 69.64,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 729.03,
- "tps_std": 0.75,
+ "tps_mean": 2033.46,
+ "tps_std": 5.16,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11274,22 +11460,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 76.59,
- "tps_std": 0.03,
+ "tps_mean": 76.47,
+ "tps_std": 0.26,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11299,22 +11485,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 651.26,
- "tps_std": 1.22,
+ "tps_mean": 2276.86,
+ "tps_std": 9.6,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11324,22 +11510,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 69.44,
- "tps_std": 0.01,
+ "tps_mean": 70.76,
+ "tps_std": 0.26,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11349,122 +11535,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 1799.45,
- "tps_std": 7.32,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 75.43,
- "tps_std": 0.03,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 2267.56,
- "tps_std": 6.61,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 68.27,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 729.58,
- "tps_std": 0.87,
+ "tps_mean": 727.18,
+ "tps_std": 2.22,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11474,22 +11560,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 75.48,
- "tps_std": 0.02,
+ "tps_mean": 75.65,
+ "tps_std": 0.74,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11499,21 +11585,96 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma-hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 750.44,
+ "tps_mean": 740.27,
+ "tps_std": 10.38,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 70.76,
+ "tps_std": 0.11,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 2035.38,
+ "tps_std": 4.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 75.4,
"tps_std": 0.8,
"error": false,
"error_type": null,
@@ -11524,22 +11685,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
"fa": true,
- "test": "tg128",
- "tps_mean": 68.27,
- "tps_std": 0.01,
+ "test": "pp512",
+ "tps_mean": 1515.55,
+ "tps_std": 8.1,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11549,122 +11710,47 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 1812.27,
- "tps_std": 4.63,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 76.22,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 1510.06,
- "tps_std": 4.96,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 69.58,
- "tps_std": 0.02,
+ "tps_mean": 70.2,
+ "tps_std": 0.39,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 3.88,
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 729.81,
- "tps_std": 1.15,
+ "tps_mean": 714.75,
+ "tps_std": 27.98,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11674,22 +11760,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 76.03,
- "tps_std": 0.04,
+ "tps_mean": 66.1,
+ "tps_std": 5.25,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11699,22 +11785,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 645.48,
- "tps_std": 1.4,
+ "tps_mean": 596.86,
+ "tps_std": 37.66,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11724,22 +11810,22 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 69.67,
- "tps_std": 0.02,
+ "tps_mean": 58.75,
+ "tps_std": 3.09,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -11749,694 +11835,622 @@
"file_size_gib": 1.8,
"name_params_b": 3.88,
"quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm6_4_3__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gemma-3-4b-it-Q3_K_S",
"model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 1628.18,
- "tps_std": 1.73,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 84.23,
- "tps_std": 0.15,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 947.36,
- "tps_std": 1.47,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 60.35,
- "tps_std": 0.15,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 1529.98,
- "tps_std": 0.8,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 86.95,
- "tps_std": 0.31,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 1498.81,
- "tps_std": 1.7,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gemma-3-4b-it-Q3_K_S",
- "model_clean": "gemma-3-4b-it-Q3_K_S",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 81.29,
- "tps_std": 0.12,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 3.88,
- "file_size_gib": 1.8,
- "name_params_b": 3.88,
- "quant": "Q3_K_S",
- "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 353.66,
- "tps_std": 0.64,
+ "tps_mean": 2014.6,
+ "tps_std": 24.35,
"error": false,
"error_type": null,
"backend": "ROCm",
"ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 33.65,
- "tps_std": 0.0,
+ "tps_mean": 59.16,
+ "tps_std": 3.76,
"error": false,
"error_type": null,
"backend": "ROCm",
"ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
"env_variant": "rocwmma",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log",
- "build": null
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
"test": "pp512",
- "tps_mean": 352.4,
- "tps_std": 1.12,
+ "tps_mean": 2191.77,
+ "tps_std": 78.21,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm6_4_2.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 31.99,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 321.54,
- "tps_std": 0.46,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 33.03,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 604.24,
- "tps_std": 4.34,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 33.69,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 548.27,
+ "tps_mean": 54.32,
"tps_std": 2.65,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta__fa1.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1991.71,
+ "tps_std": 2.91,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 56.37,
+ "tps_std": 3.4,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 2096.22,
+ "tps_std": 4.59,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 64.88,
+ "tps_std": 0.05,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 2027.41,
+ "tps_std": 4.62,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 77.12,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1550.55,
+ "tps_std": 4.52,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 33.07,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 355.23,
- "tps_std": 1.71,
+ "tps_mean": 70.54,
+ "tps_std": 0.06,
"error": false,
"error_type": null,
"backend": "ROCm",
"ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": false,
- "test": "tg128",
- "tps_mean": 33.66,
- "tps_std": 0.0,
+ "test": "pp512",
+ "tps_mean": 1992.48,
+ "tps_std": 7.34,
"error": false,
"error_type": null,
"backend": "ROCm",
"ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
"env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 323.79,
- "tps_std": 0.87,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 77.05,
+ "tps_std": 0.03,
"error": false,
"error_type": null,
"backend": "ROCm",
"ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
"env_variant": "hblt0",
"fa": true,
- "test": "tg128",
- "tps_mean": 33.04,
- "tps_std": 0.0,
+ "test": "pp512",
+ "tps_mean": 1474.15,
+ "tps_std": 1.44,
"error": false,
"error_type": null,
"backend": "ROCm",
"ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_beta__hblt0__fa1.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma",
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "rocm7_rc-hblt0",
"env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 592.27,
- "tps_std": 5.61,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 33.68,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 735.02,
- "tps_std": 5.32,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
+ "env_variant": "hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 33.34,
+ "tps_mean": 70.44,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log",
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1593.62,
+ "tps_std": 2.9,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 85.26,
+ "tps_std": 0.26,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 936.52,
+ "tps_std": 2.35,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 60.89,
+ "tps_std": 0.1,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1515.05,
+ "tps_std": 2.98,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 87.54,
+ "tps_std": 0.18,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1476.16,
+ "tps_std": 5.12,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gemma-3-4b-it-Q3_K_S",
+ "model_clean": "gemma-3-4b-it-Q3_K_S",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 82.48,
+ "tps_std": 0.36,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 3.88,
+ "file_size_gib": 1.8,
+ "name_params_b": 3.88,
+ "quant": "Q3_K_S",
+ "log": "results/gemma-3-4b-it-Q3_K_S__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-120b-F16",
"model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 353.49,
- "tps_std": 1.71,
+ "tps_mean": 355.59,
+ "tps_std": 0.86,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -12446,21 +12460,171 @@
"file_size_gib": 60.87,
"name_params_b": 116.83,
"quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-120b-F16",
"model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 33.63,
+ "tps_mean": 33.97,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 390.43,
+ "tps_std": 0.7,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.81,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 355.94,
+ "tps_std": 1.35,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.97,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 322.57,
+ "tps_std": 0.31,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.3,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -12471,22 +12635,22 @@
"file_size_gib": 60.87,
"name_params_b": 116.83,
"quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_2__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-120b-F16",
"model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
"test": "pp512",
- "tps_mean": 388.5,
- "tps_std": 1.06,
+ "tps_mean": 622.16,
+ "tps_std": 6.71,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -12496,18 +12660,268 @@
"file_size_gib": 60.87,
"name_params_b": 116.83,
"quant": "F16",
- "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-120b-F16",
"model_clean": "gpt-oss-120b-F16",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.91,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 743.09,
+ "tps_std": 4.89,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.76,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 354.98,
+ "tps_std": 0.72,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.86,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 390.67,
+ "tps_std": 0.97,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.79,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 617.0,
+ "tps_std": 4.97,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.9,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 543.39,
+ "tps_std": 5.51,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
"fa": true,
"test": "tg128",
"tps_mean": 33.28,
@@ -12521,10 +12935,310 @@
"file_size_gib": 60.87,
"name_params_b": 116.83,
"quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 354.18,
+ "tps_std": 0.29,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.88,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 322.46,
+ "tps_std": 0.46,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.33,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 643.61,
+ "tps_std": 7.14,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.91,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 736.33,
+ "tps_std": 3.33,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.74,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 651.63,
+ "tps_std": 3.08,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.88,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 738.84,
+ "tps_std": 9.12,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.79,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12535,12 +13249,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 598.68,
- "tps_std": 9.32,
+ "tps_mean": 649.28,
+ "tps_std": 0.87,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 60.87,
@@ -12548,8 +13262,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12560,12 +13274,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 33.75,
+ "tps_mean": 33.99,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 60.87,
@@ -12573,8 +13287,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12585,12 +13299,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 546.3,
- "tps_std": 3.37,
+ "tps_mean": 550.01,
+ "tps_std": 3.85,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 60.87,
@@ -12598,8 +13312,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12610,12 +13324,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 33.04,
+ "tps_mean": 33.38,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 60.87,
@@ -12623,8 +13337,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12635,8 +13349,8 @@
"env_variant": "hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 354.34,
- "tps_std": 0.67,
+ "tps_mean": 659.79,
+ "tps_std": 3.13,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -12648,8 +13362,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12660,7 +13374,7 @@
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 33.76,
+ "tps_mean": 34.01,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -12673,8 +13387,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12685,8 +13399,8 @@
"env_variant": "hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 324.26,
- "tps_std": 0.8,
+ "tps_mean": 553.65,
+ "tps_std": 2.4,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -12698,8 +13412,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12710,7 +13424,7 @@
"env_variant": "hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 33.05,
+ "tps_mean": 33.31,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -12723,8 +13437,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12735,12 +13449,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 450.26,
- "tps_std": 1.46,
+ "tps_mean": 449.86,
+ "tps_std": 1.68,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 60.87,
@@ -12748,8 +13462,8 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -12760,162 +13474,137 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 33.56,
- "tps_std": 0.03,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 499.8,
- "tps_std": 1.95,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 33.18,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 230.22,
- "tps_std": 0.76,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 33.16,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 243.2,
- "tps_std": 1.11,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 60.87,
- "name_params_b": 116.83,
- "quant": "F16",
- "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-120b-F16",
- "model_clean": "gpt-oss-120b-F16",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 33.15,
+ "tps_mean": 34.19,
"tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 496.21,
+ "tps_std": 1.71,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.64,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 230.09,
+ "tps_std": 0.83,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 33.57,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 243.96,
+ "tps_std": 0.96,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 60.87,
@@ -12923,8 +13612,33 @@
"quant": "F16",
"log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-F16",
+ "model_clean": "gpt-oss-120b-F16",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 33.79,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 60.87,
+ "name_params_b": 116.83,
+ "quant": "F16",
+ "log": "results/gpt-oss-120b-F16__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -12935,8 +13649,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 352.37,
- "tps_std": 0.72,
+ "tps_mean": 353.2,
+ "tps_std": 0.3,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -12948,8 +13662,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12960,8 +13674,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 45.11,
- "tps_std": 0.02,
+ "tps_mean": 45.42,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -12973,8 +13687,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -12984,20 +13698,48 @@
"env_base": "rocm6_4_2",
"env_variant": "rocwmma",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
+ "test": "pp512",
+ "tps_mean": 387.1,
+ "tps_std": 0.42,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log",
- "build": null
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 45.16,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
@@ -13029,12 +13771,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 319.23,
- "tps_std": 0.62,
+ "tps_mean": 319.84,
+ "tps_std": 0.73,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13042,8 +13784,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13054,12 +13796,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 43.79,
+ "tps_mean": 44.43,
"tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13067,117 +13809,167 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 589.45,
- "tps_std": 4.75,
+ "tps_mean": 606.86,
+ "tps_std": 5.18,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
"name_params_b": 116.83,
"quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 45.0,
- "tps_std": 0.0,
+ "tps_mean": 45.26,
+ "tps_std": 0.02,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
"name_params_b": 116.83,
"quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta.log",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 539.93,
- "tps_std": 1.23,
+ "tps_mean": 732.72,
+ "tps_std": 4.06,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
"name_params_b": 116.83,
"quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 44.01,
- "tps_std": 0.0,
+ "tps_mean": 45.14,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
"name_params_b": 116.83,
"quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__fa1.log",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
"fa": false,
+ "test": "pp512",
+ "tps_mean": 351.42,
+ "tps_std": 1.56,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 45.39,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
"test": null,
"tps_mean": null,
"tps_std": null,
@@ -13190,19 +13982,19 @@
"file_size_gib": null,
"name_params_b": null,
"quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0.log",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3-rocwmma__hblt0__fa1.log",
"build": null
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
"test": "pp512",
- "tps_mean": 323.04,
- "tps_std": 0.94,
+ "tps_mean": 608.2,
+ "tps_std": 7.04,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -13212,21 +14004,21 @@
"file_size_gib": 59.02,
"name_params_b": 116.83,
"quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
"test": "tg128",
- "tps_mean": 44.01,
+ "tps_mean": 45.4,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -13237,10 +14029,160 @@
"file_size_gib": 59.02,
"name_params_b": 116.83,
"quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 533.95,
+ "tps_std": 3.58,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 44.41,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 352.53,
+ "tps_std": 0.81,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 45.41,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 320.78,
+ "tps_std": 0.96,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 44.49,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13251,12 +14193,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 586.82,
- "tps_std": 5.23,
+ "tps_mean": 635.84,
+ "tps_std": 5.72,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13264,8 +14206,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13276,12 +14218,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 44.72,
- "tps_std": 0.3,
+ "tps_mean": 45.26,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13289,8 +14231,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13301,12 +14243,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "pp512",
- "tps_mean": 684.17,
- "tps_std": 67.05,
+ "tps_mean": 708.36,
+ "tps_std": 12.96,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13314,8 +14256,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13326,12 +14268,12 @@
"env_variant": "rocwmma",
"fa": true,
"test": "tg128",
- "tps_mean": 44.14,
- "tps_std": 0.27,
+ "tps_mean": 44.85,
+ "tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13339,8 +14281,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13351,8 +14293,8 @@
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 350.89,
- "tps_std": 1.88,
+ "tps_mean": 650.68,
+ "tps_std": 9.08,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -13364,8 +14306,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13376,7 +14318,7 @@
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 44.93,
+ "tps_mean": 45.26,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -13389,8 +14331,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13400,110 +14342,113 @@
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
"fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": null,
+ "test": "pp512",
+ "tps_mean": 734.35,
+ "tps_std": 10.26,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "gpt-oss-120b-mxfp4-00001-of-00003",
- "model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 589.82,
- "tps_std": 5.12,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 59.02,
- "name_params_b": 116.83,
- "quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-120b-mxfp4-00001-of-00003",
"model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_rc",
+ "env": "rocm7_rc-rocwmma-hblt0",
"env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 45.12,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 59.02,
- "name_params_b": 116.83,
- "quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-mxfp4-00001-of-00003",
- "model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 540.27,
- "tps_std": 2.82,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 116.83,
- "file_size_gib": 59.02,
- "name_params_b": 116.83,
- "quant": "MXFP4",
- "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-120b-mxfp4-00001-of-00003",
- "model_clean": "gpt-oss-120b-mxfp4",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
+ "env_variant": "rocwmma-hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 43.89,
+ "tps_mean": 44.85,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 646.07,
+ "tps_std": 6.86,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 45.5,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 541.57,
+ "tps_std": 3.26,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13511,8 +14456,33 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-120b-mxfp4-00001-of-00003",
+ "model_clean": "gpt-oss-120b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 44.31,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 116.83,
+ "file_size_gib": 59.02,
+ "name_params_b": 116.83,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13523,8 +14493,8 @@
"env_variant": "hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 354.6,
- "tps_std": 1.2,
+ "tps_mean": 657.58,
+ "tps_std": 3.78,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -13536,8 +14506,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13548,7 +14518,7 @@
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 45.04,
+ "tps_mean": 45.56,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -13561,8 +14531,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13573,8 +14543,8 @@
"env_variant": "hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 319.46,
- "tps_std": 0.48,
+ "tps_mean": 550.79,
+ "tps_std": 2.99,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -13586,8 +14556,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13598,7 +14568,7 @@
"env_variant": "hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 43.9,
+ "tps_mean": 44.41,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -13611,8 +14581,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13623,12 +14593,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 488.47,
- "tps_std": 2.3,
+ "tps_mean": 485.54,
+ "tps_std": 2.45,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13636,8 +14606,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13648,12 +14618,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 48.21,
- "tps_std": 0.02,
+ "tps_mean": 49.29,
+ "tps_std": 0.03,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13661,8 +14631,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13673,12 +14643,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 547.53,
- "tps_std": 3.03,
+ "tps_mean": 540.81,
+ "tps_std": 2.56,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13686,8 +14656,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13698,12 +14668,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 47.49,
- "tps_std": 0.08,
+ "tps_mean": 48.25,
+ "tps_std": 0.03,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13711,8 +14681,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13723,12 +14693,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 239.44,
- "tps_std": 1.23,
+ "tps_mean": 239.24,
+ "tps_std": 1.27,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13736,8 +14706,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13748,12 +14718,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 49.15,
- "tps_std": 0.02,
+ "tps_mean": 50.39,
+ "tps_std": 0.05,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13761,8 +14731,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13773,12 +14743,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 255.37,
- "tps_std": 1.68,
+ "tps_mean": 255.5,
+ "tps_std": 1.49,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13786,8 +14756,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13798,12 +14768,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 49.31,
- "tps_std": 0.08,
+ "tps_mean": 50.41,
+ "tps_std": 0.04,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 116.83,
"file_size_gib": 59.02,
@@ -13811,8 +14781,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-120b-mxfp4-00001-of-00003__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -13823,8 +14793,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 324.31,
- "tps_std": 4.5,
+ "tps_mean": 324.3,
+ "tps_std": 4.23,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -13836,8 +14806,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -13848,7 +14818,257 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 26.87,
+ "tps_mean": 27.1,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 342.14,
+ "tps_std": 4.83,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 27.05,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 324.36,
+ "tps_std": 4.35,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_2.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 27.12,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_2.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 304.23,
+ "tps_std": 3.73,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 26.85,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1198.51,
+ "tps_std": 10.39,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 27.14,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1506.44,
+ "tps_std": 7.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 27.1,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -13859,472 +15079,22 @@
"file_size_gib": 38.97,
"name_params_b": 20.91,
"quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma.log",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-20b-F32",
"model_clean": "gpt-oss-20b-F32",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 343.3,
- "tps_std": 5.27,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 26.76,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 322.55,
- "tps_std": 4.18,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 24.9,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 304.86,
- "tps_std": 3.77,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 26.58,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 1135.9,
- "tps_std": 9.1,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 26.88,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 1011.32,
- "tps_std": 4.33,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 26.65,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 313.05,
- "tps_std": 6.96,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 26.86,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 301.3,
- "tps_std": 4.81,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 26.65,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 1130.14,
- "tps_std": 7.45,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 26.84,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 1502.62,
- "tps_std": 12.84,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 26.67,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 319.92,
- "tps_std": 6.39,
+ "tps_mean": 326.8,
+ "tps_std": 4.56,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -14334,20 +15104,170 @@
"file_size_gib": 38.97,
"name_params_b": 20.91,
"quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-20b-F32",
"model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
"env_variant": "rocwmma-hblt0",
"fa": false,
"test": "tg128",
+ "tps_mean": 27.13,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 350.18,
+ "tps_std": 5.1,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 27.09,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1185.57,
+ "tps_std": 6.55,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 27.12,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1000.77,
+ "tps_std": 2.37,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
"tps_mean": 26.83,
"tps_std": 0.0,
"error": false,
@@ -14359,10 +15279,260 @@
"file_size_gib": 38.97,
"name_params_b": 20.91,
"quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 322.0,
+ "tps_std": 4.37,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 27.14,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 303.26,
+ "tps_std": 4.84,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 26.9,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1256.75,
+ "tps_std": 10.54,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 27.11,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1481.17,
+ "tps_std": 9.67,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 27.03,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1202.19,
+ "tps_std": 5.53,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 27.1,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14373,8 +15543,8 @@
"env_variant": "rocwmma-hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 338.36,
- "tps_std": 5.02,
+ "tps_mean": 1422.9,
+ "tps_std": 11.48,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -14386,8 +15556,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14398,7 +15568,7 @@
"env_variant": "rocwmma-hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 26.71,
+ "tps_mean": 27.04,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -14411,8 +15581,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc-rocwmma__hblt0__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14423,12 +15593,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 1130.86,
- "tps_std": 14.88,
+ "tps_mean": 1253.01,
+ "tps_std": 23.2,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14436,8 +15606,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14448,37 +15618,62 @@
"env_variant": null,
"fa": false,
"test": "tg128",
+ "tps_mean": 27.11,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1005.24,
+ "tps_std": 32.45,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
"tps_mean": 26.89,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 1007.82,
- "tps_std": 22.14,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14486,48 +15681,48 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-20b-F32",
"model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc",
+ "env": "rocm7_rc-hblt0",
"env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1220.02,
+ "tps_std": 12.3,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 38.97,
+ "name_params_b": 20.91,
+ "quant": "F32",
+ "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-F32",
+ "model_clean": "gpt-oss-20b-F32",
+ "env": "rocm7_rc-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "hblt0",
+ "fa": false,
"test": "tg128",
- "tps_mean": 26.66,
+ "tps_mean": 27.17,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 321.8,
- "tps_std": 6.18,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
"ngl": 99,
"mmap": 0,
"params_b": 20.91,
@@ -14536,33 +15731,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-F32",
- "model_clean": "gpt-oss-20b-F32",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 26.83,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 38.97,
- "name_params_b": 20.91,
- "quant": "F32",
- "log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14573,8 +15743,8 @@
"env_variant": "hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 302.84,
- "tps_std": 5.01,
+ "tps_mean": 985.58,
+ "tps_std": 10.64,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -14586,8 +15756,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14598,7 +15768,7 @@
"env_variant": "hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 26.61,
+ "tps_mean": 26.88,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -14611,8 +15781,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14623,12 +15793,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 369.6,
- "tps_std": 1.3,
+ "tps_mean": 367.61,
+ "tps_std": 1.9,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14636,8 +15806,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14648,12 +15818,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 8.72,
+ "tps_mean": 8.69,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14661,8 +15831,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14673,12 +15843,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 389.96,
- "tps_std": 1.87,
+ "tps_mean": 386.12,
+ "tps_std": 1.98,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14686,8 +15856,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14698,12 +15868,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 8.7,
+ "tps_mean": 8.66,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14711,8 +15881,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_amdvlk__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14723,12 +15893,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 318.04,
- "tps_std": 1.5,
+ "tps_mean": 315.56,
+ "tps_std": 1.4,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14736,8 +15906,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14748,12 +15918,12 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 7.89,
+ "tps_mean": 7.86,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14761,8 +15931,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_radv.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14773,12 +15943,12 @@
"env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 334.64,
- "tps_std": 1.46,
+ "tps_mean": 333.31,
+ "tps_std": 1.47,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14786,8 +15956,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14798,12 +15968,12 @@
"env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 7.9,
+ "tps_mean": 7.92,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 38.97,
@@ -14811,8 +15981,8 @@
"quant": "F32",
"log": "results/gpt-oss-20b-F32__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -14823,8 +15993,8 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 581.92,
- "tps_std": 2.0,
+ "tps_mean": 582.6,
+ "tps_std": 4.9,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -14836,8 +16006,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -14848,7 +16018,107 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 64.34,
+ "tps_mean": 64.91,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 644.05,
+ "tps_std": 3.87,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_2-rocwmma",
+ "env_base": "rocm6_4_2",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 64.63,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 581.11,
+ "tps_std": 2.96,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_2",
+ "env_base": "rocm6_4_2",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 65.0,
"tps_std": 0.02,
"error": false,
"error_type": null,
@@ -14859,22 +16129,22 @@
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma.log",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-20b-mxfp4",
"model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm6_4_2-rocwmma",
+ "env": "rocm6_4_2",
"env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
+ "env_variant": null,
"fa": true,
"test": "pp512",
- "tps_mean": 642.4,
- "tps_std": 3.59,
+ "tps_mean": 522.29,
+ "tps_std": 2.36,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -14884,21 +16154,21 @@
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-20b-mxfp4",
"model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm6_4_2-rocwmma",
+ "env": "rocm6_4_2",
"env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
+ "env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 63.74,
+ "tps_mean": 63.63,
"tps_std": 0.0,
"error": false,
"error_type": null,
@@ -14909,222 +16179,22 @@
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2-rocwmma__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 582.94,
- "tps_std": 2.35,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 64.35,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 522.14,
- "tps_std": 1.92,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-20b-mxfp4",
"model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 62.97,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 1128.54,
- "tps_std": 2.4,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 64.39,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 1005.66,
- "tps_std": 1.52,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 63.07,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 585.03,
- "tps_std": 1.84,
+ "tps_mean": 1184.03,
+ "tps_std": 8.37,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -15134,21 +16204,171 @@
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-20b-mxfp4",
"model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 64.36,
+ "tps_mean": 65.07,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1480.28,
+ "tps_std": 9.38,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-rocwmma",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 64.45,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 584.04,
+ "tps_std": 2.52,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 64.87,
+ "tps_std": 0.02,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 643.25,
+ "tps_std": 3.86,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-rocwmma-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 64.67,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -15159,22 +16379,22 @@
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0.log",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3-rocwmma__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-20b-mxfp4",
"model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
"test": "pp512",
- "tps_mean": 528.92,
- "tps_std": 2.02,
+ "tps_mean": 1171.02,
+ "tps_std": 7.04,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -15184,21 +16404,71 @@
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
"model": "gpt-oss-20b-mxfp4",
"model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 64.94,
+ "tps_std": 0.04,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 996.31,
+ "tps_std": 6.53,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3",
+ "env_base": "rocm6_4_3",
+ "env_variant": null,
"fa": true,
"test": "tg128",
- "tps_mean": 63.0,
+ "tps_mean": 63.68,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -15209,10 +16479,110 @@
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_beta__hblt0__fa1.log",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 582.51,
+ "tps_std": 2.41,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 64.89,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 522.63,
+ "tps_std": 1.74,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm6_4_3-hblt0",
+ "env_base": "rocm6_4_3",
+ "env_variant": "hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 63.66,
+ "tps_std": 0.03,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm6_4_3__hblt0__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -15223,12 +16593,12 @@
"env_variant": "rocwmma",
"fa": false,
"test": "pp512",
- "tps_mean": 1124.54,
- "tps_std": 9.14,
+ "tps_mean": 1236.64,
+ "tps_std": 11.2,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 11.27,
@@ -15236,8 +16606,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -15248,108 +16618,83 @@
"env_variant": "rocwmma",
"fa": false,
"test": "tg128",
- "tps_mean": 64.19,
+ "tps_mean": 64.78,
"tps_std": 0.01,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 1474.7,
- "tps_std": 11.5,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 63.31,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 583.69,
- "tps_std": 2.09,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
"ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 11.27,
"name_params_b": 20.91,
"quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
"model": "gpt-oss-20b-mxfp4",
"model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc-rocwmma-hblt0",
+ "env": "rocm7_rc-rocwmma",
"env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
+ "env_variant": "rocwmma",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1460.58,
+ "tps_std": 11.92,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc-rocwmma",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma",
+ "fa": true,
"test": "tg128",
"tps_mean": 64.26,
- "tps_std": 0.01,
+ "tps_std": 0.0,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1299.34,
+ "tps_std": 7.77,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -15361,8 +16706,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -15371,139 +16716,139 @@
"env": "rocm7_rc-rocwmma-hblt0",
"env_base": "rocm7_rc",
"env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 642.92,
- "tps_std": 1.97,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 63.28,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 1125.6,
- "tps_std": 1.9,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 64.35,
- "tps_std": 0.01,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 997.74,
- "tps_std": 8.16,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 63.0,
+ "tps_mean": 64.85,
"tps_std": 0.0,
"error": false,
"error_type": null,
"backend": "ROCm",
- "ngl": 999,
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1516.33,
+ "tps_std": 21.51,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc-rocwmma-hblt0",
+ "env_base": "rocm7_rc",
+ "env_variant": "rocwmma-hblt0",
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 64.4,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc-rocwmma__hblt0__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 1246.14,
+ "tps_std": 8.32,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 65.15,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1010.38,
+ "tps_std": 6.35,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 11.27,
@@ -15511,8 +16856,33 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "rocm7_rc",
+ "env_base": "rocm7_rc",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 63.49,
+ "tps_std": 0.01,
+ "error": false,
+ "error_type": null,
+ "backend": "ROCm",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__rocm7_rc__fa1.log",
+ "build": {
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -15523,8 +16893,8 @@
"env_variant": "hblt0",
"fa": false,
"test": "pp512",
- "tps_mean": 584.02,
- "tps_std": 1.44,
+ "tps_mean": 1303.74,
+ "tps_std": 6.94,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -15536,8 +16906,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -15548,7 +16918,7 @@
"env_variant": "hblt0",
"fa": false,
"test": "tg128",
- "tps_mean": 64.5,
+ "tps_mean": 65.1,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -15561,8 +16931,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -15573,8 +16943,8 @@
"env_variant": "hblt0",
"fa": true,
"test": "pp512",
- "tps_mean": 525.48,
- "tps_std": 1.39,
+ "tps_mean": 1037.92,
+ "tps_std": 11.67,
"error": false,
"error_type": null,
"backend": "ROCm",
@@ -15586,8 +16956,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -15598,7 +16968,7 @@
"env_variant": "hblt0",
"fa": true,
"test": "tg128",
- "tps_mean": 63.04,
+ "tps_mean": 63.63,
"tps_std": 0.01,
"error": false,
"error_type": null,
@@ -15611,8 +16981,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__rocm7_rc__hblt0__fa1.log",
"build": {
- "hash": "79c1160b",
- "number": "6123"
+ "hash": "de219279",
+ "number": "6181"
}
},
{
@@ -15623,12 +16993,12 @@
"env_variant": null,
"fa": false,
"test": "pp512",
- "tps_mean": 1218.18,
- "tps_std": 8.08,
+ "tps_mean": 1220.69,
+ "tps_std": 8.95,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 11.27,
@@ -15636,8 +17006,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
},
{
@@ -15648,137 +17018,162 @@
"env_variant": null,
"fa": false,
"test": "tg128",
- "tps_mean": 69.76,
+ "tps_mean": 71.42,
+ "tps_std": 0.2,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 1467.61,
+ "tps_std": 12.7,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "vulkan_amdvlk",
+ "env_base": "vulkan_amdvlk",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 69.47,
+ "tps_std": 0.09,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "pp512",
+ "tps_mean": 651.21,
+ "tps_std": 5.24,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": false,
+ "test": "tg128",
+ "tps_mean": 72.35,
+ "tps_std": 0.08,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "pp512",
+ "tps_mean": 732.35,
+ "tps_std": 7.51,
+ "error": false,
+ "error_type": null,
+ "backend": "Vulkan",
+ "ngl": 99,
+ "mmap": 0,
+ "params_b": 20.91,
+ "file_size_gib": 11.27,
+ "name_params_b": 20.91,
+ "quant": "MXFP4",
+ "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log",
+ "build": {
+ "hash": "1fe00296",
+ "number": "6182"
+ }
+ },
+ {
+ "model": "gpt-oss-20b-mxfp4",
+ "model_clean": "gpt-oss-20b-mxfp4",
+ "env": "vulkan_radv",
+ "env_base": "vulkan_radv",
+ "env_variant": null,
+ "fa": true,
+ "test": "tg128",
+ "tps_mean": 72.05,
"tps_std": 0.07,
"error": false,
"error_type": null,
"backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 1482.59,
- "tps_std": 12.76,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 68.63,
- "tps_std": 0.11,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 649.86,
- "tps_std": 5.16,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 70.72,
- "tps_std": 0.04,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 728.71,
- "tps_std": 8.4,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
+ "ngl": 99,
"mmap": 0,
"params_b": 20.91,
"file_size_gib": 11.27,
@@ -15786,893 +17181,8 @@
"quant": "MXFP4",
"log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log",
"build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "gpt-oss-20b-mxfp4",
- "model_clean": "gpt-oss-20b-mxfp4",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 70.49,
- "tps_std": 0.04,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 20.91,
- "file_size_gib": 11.27,
- "name_params_b": 20.91,
- "quant": "MXFP4",
- "log": "results/gpt-oss-20b-mxfp4__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 70.6,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma.log",
- "build": null
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm6_4_2-rocwmma",
- "env_base": "rocm6_4_2",
- "env_variant": "rocwmma",
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 70.6,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2-rocwmma__fa1.log",
- "build": null
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 33.76,
- "tps_std": 0.04,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.48,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 31.69,
- "tps_std": 0.04,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm6_4_2",
- "env_base": "rocm6_4_2",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.62,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm6_4_2__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 99.09,
- "tps_std": 0.1,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.61,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 81.54,
- "tps_std": 0.11,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_beta",
- "env_base": "rocm7_beta",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.63,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "hang",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 70.6,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0.log",
- "build": null
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 31.63,
- "tps_std": 0.02,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_beta-hblt0",
- "env_base": "rocm7_beta",
- "env_variant": "hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.62,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_beta__hblt0__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "pp512",
- "tps_mean": 99.41,
- "tps_std": 0.11,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.62,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "pp512",
- "tps_mean": 106.7,
- "tps_std": 0.12,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma",
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.6,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "pp512",
- "tps_mean": 33.87,
- "tps_std": 0.08,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.61,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "pp512",
- "tps_mean": 34.48,
- "tps_std": 0.05,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-rocwmma-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "rocwmma-hblt0",
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.61,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 99,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc-rocwmma__hblt0__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 99.16,
- "tps_std": 0.09,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 4.62,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 81.56,
- "tps_std": 0.09,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc",
- "env_base": "rocm7_rc",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 4.62,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "ROCm",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__fa1.log",
- "build": {
- "hash": "79c1160b",
- "number": "6123"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": false,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 70.6,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0.log",
- "build": null
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "rocm7_rc-hblt0",
- "env_base": "rocm7_rc",
- "env_variant": "hblt0",
- "fa": true,
- "test": null,
- "tps_mean": null,
- "tps_std": null,
- "error": true,
- "error_type": "runtime",
- "backend": null,
- "ngl": null,
- "mmap": null,
- "params_b": null,
- "file_size_gib": null,
- "name_params_b": 70.6,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__rocm7_rc__hblt0__fa1.log",
- "build": null
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 72.73,
- "tps_std": 0.05,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 5.08,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 73.47,
- "tps_std": 0.03,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_amdvlk",
- "env_base": "vulkan_amdvlk",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 5.04,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_amdvlk__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "pp512",
- "tps_mean": 78.79,
- "tps_std": 0.21,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": false,
- "test": "tg128",
- "tps_mean": 5.04,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "pp512",
- "tps_mean": 80.58,
- "tps_std": 0.13,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
- }
- },
- {
- "model": "llama3.3-70.6B-Q4_K_M",
- "model_clean": "llama3.3-70.6B-Q4_K_M",
- "env": "vulkan_radv",
- "env_base": "vulkan_radv",
- "env_variant": null,
- "fa": true,
- "test": "tg128",
- "tps_mean": 5.03,
- "tps_std": 0.0,
- "error": false,
- "error_type": null,
- "backend": "Vulkan",
- "ngl": 999,
- "mmap": 0,
- "params_b": 70.55,
- "file_size_gib": 39.59,
- "name_params_b": 70.55,
- "quant": "Q4_K_M",
- "log": "results/llama3.3-70.6B-Q4_K_M__vulkan_radv__fa1.log",
- "build": {
- "hash": "34c9d765",
- "number": "6122"
+ "hash": "1fe00296",
+ "number": "6182"
}
}
]